Merge pull request !1867 from YuJianfeng/mastertags/v0.5.0-beta
| @@ -547,5 +547,38 @@ int Sign(float x) { | |||
| } | |||
| return 0; | |||
| } | |||
| void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, | |||
| size_t outer_dim) { | |||
| MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_); | |||
| MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_); | |||
| MS_EXCEPTION_IF_NULL(unique_grad); | |||
| MS_EXCEPTION_IF_NULL(unique_grad->value_); | |||
| MS_EXCEPTION_IF_NULL(unique_grad->indices_); | |||
| std::unordered_map<int, size_t> index_map; | |||
| size_t unique_indices_size = 0; | |||
| for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) { | |||
| int index = origin_sparse_grad.indices_[i]; | |||
| if (index < 0 || (size_t)index >= first_dim) { | |||
| continue; | |||
| } | |||
| auto iter = index_map.find(index); | |||
| if (iter == index_map.end()) { | |||
| index_map[index] = unique_indices_size; | |||
| unique_grad->indices_[unique_indices_size] = index; | |||
| for (size_t j = unique_indices_size * outer_dim, k = i * outer_dim; j < (unique_indices_size + 1) * outer_dim; | |||
| ++j, ++k) { | |||
| unique_grad->value_[j] = origin_sparse_grad.value_[k]; | |||
| } | |||
| unique_indices_size++; | |||
| } else { | |||
| size_t first_index = iter->second; | |||
| for (size_t j = first_index * outer_dim, k = i * outer_dim; j < (first_index + 1) * outer_dim; ++j, ++k) { | |||
| unique_grad->value_[j] += origin_sparse_grad.value_[k]; | |||
| } | |||
| } | |||
| } | |||
| unique_grad->indices_size_ = unique_indices_size; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -69,6 +69,12 @@ class KernelMeta { | |||
| std::unordered_map<std::string, std::string> kernel_meta_map_; | |||
| }; | |||
| struct SparseGradient { | |||
| float *value_; | |||
| int *indices_; | |||
| size_t indices_size_; | |||
| }; | |||
| bool CheckCache(const std::string &kernel_name); | |||
| KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor); | |||
| KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor); | |||
| @@ -84,6 +90,8 @@ void SaveJsonInfo(const std::string &json_name, const std::string &info); | |||
| std::string GetProcessor(const AnfNodePtr &anf_node); | |||
| bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b); | |||
| int Sign(float x); | |||
| void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, | |||
| size_t outer_dim); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -84,28 +84,35 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp | |||
| auto grad = reinterpret_cast<float *>(inputs[3]->addr); | |||
| auto indices = reinterpret_cast<int *>(inputs[4]->addr); | |||
| for (size_t i = 0; i < indices_size_; ++i) { | |||
| int index = indices[i]; | |||
| if ((size_t)index >= var_first_dim_size_) { | |||
| MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range"; | |||
| std::vector<float> new_grad(indices_size_ * var_outer_dim_size_); | |||
| std::vector<int> new_indices(indices_size_); | |||
| SparseGradient unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size_}); | |||
| DeduplicateIndexedSlices(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_, | |||
| var_outer_dim_size_); | |||
| for (size_t i = 0; i < unique_sparse_grad.indices_size_; ++i) { | |||
| int index = unique_sparse_grad.indices_[i]; | |||
| if (index < 0 || (size_t)index >= var_first_dim_size_) { | |||
| MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process"; | |||
| } | |||
| for (size_t j = var_outer_dim_size_ * index, k = var_outer_dim_size_ * i; j < var_outer_dim_size_ * (index + 1); | |||
| ++j, ++k) { | |||
| auto accum_new = accum[j] + grad[k] * grad[k]; | |||
| auto summed_grad = unique_sparse_grad.value_[k]; | |||
| auto accum_new = accum[j] + summed_grad * summed_grad; | |||
| if (lr_power_ == -0.5) { | |||
| linear[j] += grad[k] - (sqrt(accum_new) - sqrt(accum[j])) / lr_ * var[j]; | |||
| linear[j] += summed_grad - (std::sqrt(accum_new) - std::sqrt(accum[j])) / lr_ * var[j]; | |||
| } else { | |||
| linear[j] += grad[k] - (pow(accum_new, -lr_power_) - pow(accum[j], -lr_power_)) / lr_ * var[j]; | |||
| linear[j] += summed_grad - (std::pow(accum_new, -lr_power_) - std::pow(accum[j], -lr_power_)) / lr_ * var[j]; | |||
| } | |||
| auto x = Sign(linear[j]) * l1_ - linear[j]; | |||
| float y; | |||
| if (lr_power_ == -0.5) { | |||
| y = sqrt(accum_new) / lr_ + 2 * l2_; | |||
| y = std::sqrt(accum_new) / lr_ + 2 * l2_; | |||
| } else { | |||
| y = pow(accum_new, -lr_power_) / lr_ + 2 * l2_; | |||
| y = std::pow(accum_new, -lr_power_) / lr_ + 2 * l2_; | |||
| } | |||
| auto pre_shrink = x / y; | |||
| var[j] = abs(linear[j]) > l1_ ? pre_shrink : 0; | |||
| var[j] = std::fabs(linear[j]) > l1_ ? pre_shrink : 0; | |||
| accum[j] = accum_new; | |||
| } | |||
| } | |||
| @@ -0,0 +1,95 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <vector> | |||
| #include "common/common_test.h" | |||
| #include "kernel/common_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class CommonUtilTest : public UT::Common { | |||
| public: | |||
| CommonUtilTest() = default; | |||
| }; | |||
| TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest1) { | |||
| // The indices is a vector and the grad is a tensor with shape (6, 2) | |||
| /* 0 | |||
| * 0 | |||
| * 1 | |||
| * 1 | |||
| * 0 | |||
| * 3 | |||
| */ | |||
| std::vector<int> indices{0, 0, 1, 1, 0, 3}; | |||
| /* 0 1 | |||
| * 2 3 | |||
| * 4 5 | |||
| * 6 7 | |||
| * 8 9 | |||
| * 10 11 | |||
| */ | |||
| std::vector<float> grad; | |||
| for (int i = 0; i < 6 * 2; i++) { | |||
| grad.push_back(i); | |||
| } | |||
| std::vector<int> unique_indices(3); | |||
| std::vector<float> summed_grad(6); | |||
| SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0}); | |||
| DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2); | |||
| EXPECT_EQ(unique_grad.indices_size_, 3); | |||
| EXPECT_EQ(unique_indices, std::vector<int>({0, 1, 3})); | |||
| /* 10 13 | |||
| * 10 12 | |||
| * 10 11 | |||
| */ | |||
| EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12, 10, 11})); | |||
| } | |||
| TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest2) { | |||
| // The indices is a vector and the grad is a tensor with shape (6, 2) | |||
| /* 0 | |||
| * 0 | |||
| * 1 | |||
| * 1 | |||
| * 0 | |||
| * 6 | |||
| */ | |||
| std::vector<int> indices{0, 0, 1, 1, 0, 6}; | |||
| /* 0 1 | |||
| * 2 3 | |||
| * 4 5 | |||
| * 6 7 | |||
| * 8 9 | |||
| * 10 11 | |||
| */ | |||
| std::vector<float> grad; | |||
| for (int i = 0; i < 6 * 2; i++) { | |||
| grad.push_back(i); | |||
| } | |||
| std::vector<int> unique_indices(2); | |||
| std::vector<float> summed_grad(4); | |||
| SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0}); | |||
| DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2); | |||
| EXPECT_EQ(unique_grad.indices_size_, 2); | |||
| EXPECT_EQ(unique_indices, std::vector<int>({0, 1})); | |||
| /* 10 13 | |||
| * 10 12 | |||
| */ | |||
| EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12})); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||