Merge pull request !5716 from ZPaC/master-unify-float-to-int-casttags/v1.0.0
| @@ -48,6 +48,10 @@ void SparseApplyFtrlPSKernel::InitKernel( | |||||
| if (grad_shape[0] != indices_size_) { | if (grad_shape[0] != indices_size_) { | ||||
| MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; | MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; | ||||
| } | } | ||||
| init_accum_ = AnfAlgo::GetNodeAttr<float>(cnode, "init_accum"); | |||||
| if (init_accum_ < 0) { | |||||
| MS_LOG(EXCEPTION) << "init_accum should be a non-negative scalar"; | |||||
| } | |||||
| lr_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr"); | lr_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr"); | ||||
| if (lr_ <= 0) { | if (lr_ <= 0) { | ||||
| MS_LOG(EXCEPTION) << "lr should be a positive scalar"; | MS_LOG(EXCEPTION) << "lr should be a positive scalar"; | ||||
| @@ -28,7 +28,7 @@ using mindspore::kernel::SparseApplyFtrlCPUKernel; | |||||
| class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerKernel { | class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerKernel { | ||||
| public: | public: | ||||
| SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num, size_t worker_num) | SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num, size_t worker_num) | ||||
| : PServerKernel(rank_id, pserver_num, worker_num) {} | |||||
| : PServerKernel(rank_id, pserver_num, worker_num), init_accum_(0.1) {} | |||||
| ~SparseApplyFtrlPSKernel() override = default; | ~SparseApplyFtrlPSKernel() override = default; | ||||
| void InitKernel(const CNodePtr &cnode, | void InitKernel(const CNodePtr &cnode, | ||||
| @@ -41,9 +41,11 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK | |||||
| const std::vector<size_t> &input_sizes() const override; | const std::vector<size_t> &input_sizes() const override; | ||||
| const std::vector<size_t> &output_sizes() const override; | const std::vector<size_t> &output_sizes() const override; | ||||
| const std::vector<size_t> &workspace_sizes() const override; | const std::vector<size_t> &workspace_sizes() const override; | ||||
| const float init_accum() const { return init_accum_; } | |||||
| protected: | protected: | ||||
| void ReInit(const std::vector<AddressPtr> &) override; | void ReInit(const std::vector<AddressPtr> &) override; | ||||
| float init_accum_; | |||||
| }; | }; | ||||
| } // namespace ps | } // namespace ps | ||||
| } // namespace kernel | } // namespace kernel | ||||
| @@ -100,16 +100,11 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { | |||||
| for (size_t i = 0; i < indices_index; i++) { | for (size_t i = 0; i < indices_index; i++) { | ||||
| indice_offset += lengths[i]; | indice_offset += lengths[i]; | ||||
| } | } | ||||
| float *incr_indice_data = values.data() + indice_offset; | |||||
| int *incr_indice_data = reinterpret_cast<int *>(values.data()) + indice_offset; | |||||
| size_t incr_indice_size = lengths[indices_index]; | size_t incr_indice_size = lengths[indices_index]; | ||||
| size_t incr_indice_data_size = incr_indice_size * sizeof(int); | size_t incr_indice_data_size = incr_indice_size * sizeof(int); | ||||
| std::vector<int> converted_indices(incr_indice_size); | |||||
| for (size_t i = 0; i < incr_indice_size; i++) { | |||||
| converted_indices[i] = static_cast<int>(incr_indice_data[i]); | |||||
| } | |||||
| auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, converted_indices.data(), | |||||
| incr_indice_data_size); | |||||
| auto ret2 = | |||||
| memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, incr_indice_data, incr_indice_data_size); | |||||
| if (ret2 != 0) { | if (ret2 != 0) { | ||||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")"; | MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")"; | ||||
| } | } | ||||
| @@ -18,14 +18,16 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| #include <functional> | #include <functional> | ||||
| #include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace parallel { | namespace parallel { | ||||
| namespace ps { | namespace ps { | ||||
| using mindspore::kernel::ps::SparseApplyFtrlPSKernel; | |||||
| OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel, | OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel, | ||||
| const WeightPtr &weight, const Keys &keys, const Values &values, | const WeightPtr &weight, const Keys &keys, const Values &values, | ||||
| const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) { | const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) { | ||||
| OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num); | |||||
| OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel); | |||||
| std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes(); | std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes(); | ||||
| BuildWorkspaces(optim_info, ws_sizes, worker_num); | BuildWorkspaces(optim_info, ws_sizes, worker_num); | ||||
| BuildOutputs(optim_info, worker_num); | BuildOutputs(optim_info, worker_num); | ||||
| @@ -45,7 +47,7 @@ void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vecto | |||||
| OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | ||||
| const Lengths &lens, const InputsShapePtr &inputs_shape, | const Lengths &lens, const InputsShapePtr &inputs_shape, | ||||
| size_t worker_num) { | |||||
| size_t worker_num, const std::shared_ptr<PServerKernel> &) { | |||||
| AddressPtr weight_addr = std::make_shared<kernel::Address>(); | AddressPtr weight_addr = std::make_shared<kernel::Address>(); | ||||
| weight_addr->addr = weight->data(); | weight_addr->addr = weight->data(); | ||||
| weight_addr->size = weight->size() * sizeof(float); | weight_addr->size = weight->size() * sizeof(float); | ||||
| @@ -74,7 +76,7 @@ OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, co | |||||
| OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | ||||
| const Lengths &lens, const InputsShapePtr &inputs_shape, | const Lengths &lens, const InputsShapePtr &inputs_shape, | ||||
| size_t worker_num) { | |||||
| size_t worker_num, const std::shared_ptr<PServerKernel> &) { | |||||
| AddressPtr weight_addr = std::make_shared<kernel::Address>(); | AddressPtr weight_addr = std::make_shared<kernel::Address>(); | ||||
| weight_addr->addr = weight->data(); | weight_addr->addr = weight->data(); | ||||
| weight_addr->size = weight->size() * sizeof(float); | weight_addr->size = weight->size() * sizeof(float); | ||||
| @@ -140,13 +142,9 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, | |||||
| std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(int), std::multiplies<size_t>()); | std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(int), std::multiplies<size_t>()); | ||||
| AddressPtr indices = std::make_shared<kernel::Address>(); | AddressPtr indices = std::make_shared<kernel::Address>(); | ||||
| indices->addr = new int[total_indice_size * worker_num]; | indices->addr = new int[total_indice_size * worker_num]; | ||||
| std::vector<int> converted_indices(lens[7]); | |||||
| size_t indices_data_size = lens[7] * sizeof(int); | size_t indices_data_size = lens[7] * sizeof(int); | ||||
| float *indices_data = reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6]; | |||||
| for (int i = 0; i < lens[7]; i++) { | |||||
| converted_indices[i] = static_cast<int>(indices_data[i]); | |||||
| } | |||||
| ret = memcpy_s(indices->addr, indices_data_size, converted_indices.data(), indices_data_size); | |||||
| int *indices_data = reinterpret_cast<int *>(epsilon->addr) + lens[5] + lens[6]; | |||||
| ret = memcpy_s(indices->addr, indices_data_size, indices_data, indices_data_size); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | ||||
| } | } | ||||
| @@ -158,7 +156,8 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, | |||||
| OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | ||||
| const Lengths &lens, const InputsShapePtr &inputs_shape, | const Lengths &lens, const InputsShapePtr &inputs_shape, | ||||
| size_t worker_num) { | |||||
| size_t worker_num, | |||||
| const std::shared_ptr<PServerKernel> &pserver_kernel) { | |||||
| AddressPtr weight_addr = std::make_shared<kernel::Address>(); | AddressPtr weight_addr = std::make_shared<kernel::Address>(); | ||||
| weight_addr->addr = weight->data(); | weight_addr->addr = weight->data(); | ||||
| weight_addr->size = weight->size() * sizeof(float); | weight_addr->size = weight->size() * sizeof(float); | ||||
| @@ -167,7 +166,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, | |||||
| accum->size = weight->size() * sizeof(float); | accum->size = weight->size() * sizeof(float); | ||||
| for (size_t i = 0; i < weight->size(); i++) { | for (size_t i = 0; i < weight->size(); i++) { | ||||
| float *tmp = reinterpret_cast<float *>(accum->addr); | float *tmp = reinterpret_cast<float *>(accum->addr); | ||||
| tmp[i] = 1.0; | |||||
| tmp[i] = std::dynamic_pointer_cast<SparseApplyFtrlPSKernel>(pserver_kernel)->init_accum(); | |||||
| } | } | ||||
| AddressPtr linear = std::make_shared<kernel::Address>(); | AddressPtr linear = std::make_shared<kernel::Address>(); | ||||
| linear->addr = new float[weight->size()]; | linear->addr = new float[weight->size()]; | ||||
| @@ -192,13 +191,9 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, | |||||
| std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>()); | std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>()); | ||||
| AddressPtr indices = std::make_shared<kernel::Address>(); | AddressPtr indices = std::make_shared<kernel::Address>(); | ||||
| indices->addr = new int[total_indice_size * worker_num]; | indices->addr = new int[total_indice_size * worker_num]; | ||||
| std::vector<int> converted_indices(lens[1]); | |||||
| size_t indices_data_size = lens[1] * sizeof(int); | size_t indices_data_size = lens[1] * sizeof(int); | ||||
| float *indices_data = reinterpret_cast<float *>(values.data()) + lens[0]; | |||||
| for (int i = 0; i < lens[1]; i++) { | |||||
| converted_indices[i] = static_cast<int>(indices_data[i]); | |||||
| } | |||||
| ret = memcpy_s(indices->addr, indices_data_size, converted_indices.data(), indices_data_size); | |||||
| int *indices_data = reinterpret_cast<int *>(values.data()) + lens[0]; | |||||
| ret = memcpy_s(indices->addr, indices_data_size, indices_data, indices_data_size); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | ||||
| } | } | ||||
| @@ -38,7 +38,8 @@ class OptimizerInfoBuilder { | |||||
| size_t worker_num); | size_t worker_num); | ||||
| virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, | ||||
| const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) = 0; | |||||
| const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num, | |||||
| const std::shared_ptr<PServerKernel> &pserver_kernel) = 0; | |||||
| virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num); | virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num); | ||||
| virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {} | virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {} | ||||
| @@ -47,19 +48,22 @@ class OptimizerInfoBuilder { | |||||
| class MomentumOptimInfoBuilder : public OptimizerInfoBuilder { | class MomentumOptimInfoBuilder : public OptimizerInfoBuilder { | ||||
| public: | public: | ||||
| OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | ||||
| const InputsShapePtr &inputs_shape, size_t worker_num) override; | |||||
| const InputsShapePtr &inputs_shape, size_t worker_num, | |||||
| const std::shared_ptr<PServerKernel> &pserver_kernel) override; | |||||
| }; | }; | ||||
| class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder { | class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder { | ||||
| public: | public: | ||||
| OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | ||||
| const InputsShapePtr &inputs_shpae, size_t worker_num) override; | |||||
| const InputsShapePtr &inputs_shpae, size_t worker_num, | |||||
| const std::shared_ptr<PServerKernel> &pserver_kernel) override; | |||||
| }; | }; | ||||
| class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder { | class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder { | ||||
| public: | public: | ||||
| OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, | ||||
| const InputsShapePtr &inputs_shpae, size_t worker_num) override; | |||||
| const InputsShapePtr &inputs_shpae, size_t worker_num, | |||||
| const std::shared_ptr<PServerKernel> &pserver_kernel) override; | |||||
| }; | }; | ||||
| } // namespace ps | } // namespace ps | ||||
| } // namespace parallel | } // namespace parallel | ||||
| @@ -571,11 +571,7 @@ void WorkerProxy<T>::BuildSparseValue(const ::ps::SArray<int> &lengths, const si | |||||
| int indice_offset = grad_offset + lengths[grad_index]; | int indice_offset = grad_offset + lengths[grad_index]; | ||||
| data_size = lengths[indice_index] * sizeof(T); | data_size = lengths[indice_index] * sizeof(T); | ||||
| T *indice_data = reduced_data->data() + indice_offset; | T *indice_data = reduced_data->data() + indice_offset; | ||||
| std::vector<T> convert(lengths[indice_index]); | |||||
| for (int i = 0; i < lengths[indice_index]; i++) { | |||||
| convert[i] = static_cast<T>(indices[i]); | |||||
| } | |||||
| ret = memcpy_s(indice_data, data_size, convert.data(), data_size); | |||||
| ret = memcpy_s(indice_data, data_size, indices, data_size); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | ||||
| } | } | ||||
| @@ -162,6 +162,7 @@ class FTRL(Optimizer): | |||||
| self.sparse_opt = P.FusedSparseFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking) | self.sparse_opt = P.FusedSparseFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking) | ||||
| self._ps_pull = P.Pull() | self._ps_pull = P.Pull() | ||||
| self._ps_push = P.Push("Ftrl", [0, 1, 2]) | self._ps_push = P.Push("Ftrl", [0, 1, 2]) | ||||
| self._ps_push.add_prim_attr("init_accum", initial_accum) | |||||
| self._ps_push.add_prim_attr("lr", learning_rate) | self._ps_push.add_prim_attr("lr", learning_rate) | ||||
| self._ps_push.add_prim_attr("l1", l1) | self._ps_push.add_prim_attr("l1", l1) | ||||
| self._ps_push.add_prim_attr("l2", l2) | self._ps_push.add_prim_attr("l2", l2) | ||||