Merge pull request !5716 from ZPaC/master-unify-float-to-int-casttags/v1.0.0
| @@ -48,6 +48,10 @@ void SparseApplyFtrlPSKernel::InitKernel( | |||
| if (grad_shape[0] != indices_size_) { | |||
| MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; | |||
| } | |||
| init_accum_ = AnfAlgo::GetNodeAttr<float>(cnode, "init_accum"); | |||
| if (init_accum_ < 0) { | |||
| MS_LOG(EXCEPTION) << "init_accum should be a non-negative scalar"; | |||
| } | |||
| lr_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr"); | |||
| if (lr_ <= 0) { | |||
| MS_LOG(EXCEPTION) << "lr should be a positive scalar"; | |||
| @@ -28,7 +28,7 @@ using mindspore::kernel::SparseApplyFtrlCPUKernel; | |||
| class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerKernel { | |||
| public: | |||
| SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num, size_t worker_num) | |||
| : PServerKernel(rank_id, pserver_num, worker_num) {} | |||
| : PServerKernel(rank_id, pserver_num, worker_num), init_accum_(0.1) {} | |||
| ~SparseApplyFtrlPSKernel() override = default; | |||
| void InitKernel(const CNodePtr &cnode, | |||
| @@ -41,9 +41,11 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK | |||
| const std::vector<size_t> &input_sizes() const override; | |||
| const std::vector<size_t> &output_sizes() const override; | |||
| const std::vector<size_t> &workspace_sizes() const override; | |||
| const float init_accum() const { return init_accum_; } | |||
| protected: | |||
| void ReInit(const std::vector<AddressPtr> &) override; | |||
| float init_accum_; | |||
| }; | |||
| } // namespace ps | |||
| } // namespace kernel | |||
| @@ -100,16 +100,11 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { | |||
| for (size_t i = 0; i < indices_index; i++) { | |||
| indice_offset += lengths[i]; | |||
| } | |||
| float *incr_indice_data = values.data() + indice_offset; | |||
| int *incr_indice_data = reinterpret_cast<int *>(values.data()) + indice_offset; | |||
| size_t incr_indice_size = lengths[indices_index]; | |||
| size_t incr_indice_data_size = incr_indice_size * sizeof(int); | |||
| std::vector<int> converted_indices(incr_indice_size); | |||
| for (size_t i = 0; i < incr_indice_size; i++) { | |||
| converted_indices[i] = static_cast<int>(incr_indice_data[i]); | |||
| } | |||
| auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, converted_indices.data(), | |||
| incr_indice_data_size); | |||
| auto ret2 = | |||
| memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, incr_indice_data, incr_indice_data_size); | |||
| if (ret2 != 0) { | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")"; | |||
| } | |||
| @@ -18,14 +18,16 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <functional> | |||
| #include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h" | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| namespace ps { | |||
| using mindspore::kernel::ps::SparseApplyFtrlPSKernel; | |||
| OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel, | |||
| const WeightPtr &weight, const Keys &keys, const Values &values, | |||
| const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) { | |||
| OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num); | |||
| OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel); | |||
| std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes(); | |||
| BuildWorkspaces(optim_info, ws_sizes, worker_num); | |||
| BuildOutputs(optim_info, worker_num); | |||
| @@ -45,7 +47,7 @@ void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vecto | |||
| OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | |||
| const Lengths &lens, const InputsShapePtr &inputs_shape, | |||
| size_t worker_num) { | |||
| size_t worker_num, const std::shared_ptr<PServerKernel> &) { | |||
| AddressPtr weight_addr = std::make_shared<kernel::Address>(); | |||
| weight_addr->addr = weight->data(); | |||
| weight_addr->size = weight->size() * sizeof(float); | |||
| @@ -74,7 +76,7 @@ OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, co | |||
| OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | |||
| const Lengths &lens, const InputsShapePtr &inputs_shape, | |||
| size_t worker_num) { | |||
| size_t worker_num, const std::shared_ptr<PServerKernel> &) { | |||
| AddressPtr weight_addr = std::make_shared<kernel::Address>(); | |||
| weight_addr->addr = weight->data(); | |||
| weight_addr->size = weight->size() * sizeof(float); | |||
| @@ -140,13 +142,9 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, | |||
| std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(int), std::multiplies<size_t>()); | |||
| AddressPtr indices = std::make_shared<kernel::Address>(); | |||
| indices->addr = new int[total_indice_size * worker_num]; | |||
| std::vector<int> converted_indices(lens[7]); | |||
| size_t indices_data_size = lens[7] * sizeof(int); | |||
| float *indices_data = reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6]; | |||
| for (int i = 0; i < lens[7]; i++) { | |||
| converted_indices[i] = static_cast<int>(indices_data[i]); | |||
| } | |||
| ret = memcpy_s(indices->addr, indices_data_size, converted_indices.data(), indices_data_size); | |||
| int *indices_data = reinterpret_cast<int *>(epsilon->addr) + lens[5] + lens[6]; | |||
| ret = memcpy_s(indices->addr, indices_data_size, indices_data, indices_data_size); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | |||
| } | |||
| @@ -158,7 +156,8 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, | |||
| OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | |||
| const Lengths &lens, const InputsShapePtr &inputs_shape, | |||
| size_t worker_num) { | |||
| size_t worker_num, | |||
| const std::shared_ptr<PServerKernel> &pserver_kernel) { | |||
| AddressPtr weight_addr = std::make_shared<kernel::Address>(); | |||
| weight_addr->addr = weight->data(); | |||
| weight_addr->size = weight->size() * sizeof(float); | |||
| @@ -167,7 +166,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, | |||
| accum->size = weight->size() * sizeof(float); | |||
| for (size_t i = 0; i < weight->size(); i++) { | |||
| float *tmp = reinterpret_cast<float *>(accum->addr); | |||
| tmp[i] = 1.0; | |||
| tmp[i] = std::dynamic_pointer_cast<SparseApplyFtrlPSKernel>(pserver_kernel)->init_accum(); | |||
| } | |||
| AddressPtr linear = std::make_shared<kernel::Address>(); | |||
| linear->addr = new float[weight->size()]; | |||
| @@ -192,13 +191,9 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, | |||
| std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>()); | |||
| AddressPtr indices = std::make_shared<kernel::Address>(); | |||
| indices->addr = new int[total_indice_size * worker_num]; | |||
| std::vector<int> converted_indices(lens[1]); | |||
| size_t indices_data_size = lens[1] * sizeof(int); | |||
| float *indices_data = reinterpret_cast<float *>(values.data()) + lens[0]; | |||
| for (int i = 0; i < lens[1]; i++) { | |||
| converted_indices[i] = static_cast<int>(indices_data[i]); | |||
| } | |||
| ret = memcpy_s(indices->addr, indices_data_size, converted_indices.data(), indices_data_size); | |||
| int *indices_data = reinterpret_cast<int *>(values.data()) + lens[0]; | |||
| ret = memcpy_s(indices->addr, indices_data_size, indices_data, indices_data_size); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | |||
| } | |||
| @@ -38,7 +38,8 @@ class OptimizerInfoBuilder { | |||
| size_t worker_num); | |||
| virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | |||
| const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) = 0; | |||
| const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num, | |||
| const std::shared_ptr<PServerKernel> &pserver_kernel) = 0; | |||
| virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num); | |||
| virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {} | |||
| @@ -47,19 +48,22 @@ class OptimizerInfoBuilder { | |||
| class MomentumOptimInfoBuilder : public OptimizerInfoBuilder { | |||
| public: | |||
| OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | |||
| const InputsShapePtr &inputs_shape, size_t worker_num) override; | |||
| const InputsShapePtr &inputs_shape, size_t worker_num, | |||
| const std::shared_ptr<PServerKernel> &pserver_kernel) override; | |||
| }; | |||
| class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder { | |||
| public: | |||
| OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | |||
| const InputsShapePtr &inputs_shpae, size_t worker_num) override; | |||
| const InputsShapePtr &inputs_shpae, size_t worker_num, | |||
| const std::shared_ptr<PServerKernel> &pserver_kernel) override; | |||
| }; | |||
| class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder { | |||
| public: | |||
| OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | |||
| const InputsShapePtr &inputs_shpae, size_t worker_num) override; | |||
| const InputsShapePtr &inputs_shpae, size_t worker_num, | |||
| const std::shared_ptr<PServerKernel> &pserver_kernel) override; | |||
| }; | |||
| } // namespace ps | |||
| } // namespace parallel | |||
| @@ -571,11 +571,7 @@ void WorkerProxy<T>::BuildSparseValue(const ::ps::SArray<int> &lengths, const si | |||
| int indice_offset = grad_offset + lengths[grad_index]; | |||
| data_size = lengths[indice_index] * sizeof(T); | |||
| T *indice_data = reduced_data->data() + indice_offset; | |||
| std::vector<T> convert(lengths[indice_index]); | |||
| for (int i = 0; i < lengths[indice_index]; i++) { | |||
| convert[i] = static_cast<T>(indices[i]); | |||
| } | |||
| ret = memcpy_s(indice_data, data_size, convert.data(), data_size); | |||
| ret = memcpy_s(indice_data, data_size, indices, data_size); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | |||
| } | |||
| @@ -162,6 +162,7 @@ class FTRL(Optimizer): | |||
| self.sparse_opt = P.FusedSparseFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking) | |||
| self._ps_pull = P.Pull() | |||
| self._ps_push = P.Push("Ftrl", [0, 1, 2]) | |||
| self._ps_push.add_prim_attr("init_accum", initial_accum) | |||
| self._ps_push.add_prim_attr("lr", learning_rate) | |||
| self._ps_push.add_prim_attr("l1", l1) | |||
| self._ps_push.add_prim_attr("l2", l2) | |||