!1867 Add unique process for duplicated indices in cpu kernel

Merge pull request !1867 from YuJianfeng/master
5 years ago · f1199f2100
--- a/mindspore/ccsrc/kernel/common_utils.cc
+++ b/mindspore/ccsrc/kernel/common_utils.cc
@@ -547,5 +547,38 @@ int Sign(float x) {
  }
  return 0;
 }

 void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
                              size_t outer_dim) {
  MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
  MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
  MS_EXCEPTION_IF_NULL(unique_grad);
  MS_EXCEPTION_IF_NULL(unique_grad->value_);
  MS_EXCEPTION_IF_NULL(unique_grad->indices_);
  std::unordered_map<int, size_t> index_map;
  size_t unique_indices_size = 0;
  for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) {
    int index = origin_sparse_grad.indices_[i];
    if (index < 0 || (size_t)index >= first_dim) {
      continue;
    }
    auto iter = index_map.find(index);
    if (iter == index_map.end()) {
      index_map[index] = unique_indices_size;
      unique_grad->indices_[unique_indices_size] = index;
      for (size_t j = unique_indices_size * outer_dim, k = i * outer_dim; j < (unique_indices_size + 1) * outer_dim;
           ++j, ++k) {
        unique_grad->value_[j] = origin_sparse_grad.value_[k];
      }
      unique_indices_size++;
    } else {
      size_t first_index = iter->second;
      for (size_t j = first_index * outer_dim, k = i * outer_dim; j < (first_index + 1) * outer_dim; ++j, ++k) {
        unique_grad->value_[j] += origin_sparse_grad.value_[k];
      }
    }
  }
  unique_grad->indices_size_ = unique_indices_size;
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/kernel/common_utils.h
+++ b/mindspore/ccsrc/kernel/common_utils.h
@@ -69,6 +69,12 @@ class KernelMeta {
  std::unordered_map<std::string, std::string> kernel_meta_map_;
 };

 struct SparseGradient {
  float *value_;
  int *indices_;
  size_t indices_size_;
 };

 bool CheckCache(const std::string &kernel_name);
 KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
 KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
@@ -84,6 +90,8 @@ void SaveJsonInfo(const std::string &json_name, const std::string &info);
 std::string GetProcessor(const AnfNodePtr &anf_node);
 bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
 int Sign(float x);
 void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
                              size_t outer_dim);
 }  // namespace kernel
 }  // namespace mindspore

--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
@@ -84,28 +84,35 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
  auto grad = reinterpret_cast<float *>(inputs[3]->addr);
  auto indices = reinterpret_cast<int *>(inputs[4]->addr);

  for (size_t i = 0; i < indices_size_; ++i) {
    int index = indices[i];
    if ((size_t)index >= var_first_dim_size_) {
      MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range";
  std::vector<float> new_grad(indices_size_ * var_outer_dim_size_);
  std::vector<int> new_indices(indices_size_);
  SparseGradient unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size_});
  DeduplicateIndexedSlices(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_,
                           var_outer_dim_size_);

  for (size_t i = 0; i < unique_sparse_grad.indices_size_; ++i) {
    int index = unique_sparse_grad.indices_[i];
    if (index < 0 || (size_t)index >= var_first_dim_size_) {
      MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process";
    }
    for (size_t j = var_outer_dim_size_ * index, k = var_outer_dim_size_ * i; j < var_outer_dim_size_ * (index + 1);
         ++j, ++k) {
      auto accum_new = accum[j] + grad[k] * grad[k];
      auto summed_grad = unique_sparse_grad.value_[k];
      auto accum_new = accum[j] + summed_grad * summed_grad;
      if (lr_power_ == -0.5) {
        linear[j] += grad[k] - (sqrt(accum_new) - sqrt(accum[j])) / lr_ * var[j];
        linear[j] += summed_grad - (std::sqrt(accum_new) - std::sqrt(accum[j])) / lr_ * var[j];
      } else {
        linear[j] += grad[k] - (pow(accum_new, -lr_power_) - pow(accum[j], -lr_power_)) / lr_ * var[j];
        linear[j] += summed_grad - (std::pow(accum_new, -lr_power_) - std::pow(accum[j], -lr_power_)) / lr_ * var[j];
      }
      auto x = Sign(linear[j]) * l1_ - linear[j];
      float y;
      if (lr_power_ == -0.5) {
        y = sqrt(accum_new) / lr_ + 2 * l2_;
        y = std::sqrt(accum_new) / lr_ + 2 * l2_;
      } else {
        y = pow(accum_new, -lr_power_) / lr_ + 2 * l2_;
        y = std::pow(accum_new, -lr_power_) / lr_ + 2 * l2_;
      }
      auto pre_shrink = x / y;
      var[j] = abs(linear[j]) > l1_ ? pre_shrink : 0;
      var[j] = std::fabs(linear[j]) > l1_ ? pre_shrink : 0;
      accum[j] = accum_new;
    }
  }
--- a/tests/ut/cpp/kernel/common_utils_test.cc
+++ b/tests/ut/cpp/kernel/common_utils_test.cc
@@ -0,0 +1,95 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <vector>
 #include "common/common_test.h"
 #include "kernel/common_utils.h"

 namespace mindspore {
 namespace kernel {
 class CommonUtilTest : public UT::Common {
 public:
  CommonUtilTest() = default;
 };

 TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest1) {
  // The indices is a vector and the grad is a tensor with shape (6, 2)
  /* 0
   * 0
   * 1
   * 1
   * 0
   * 3
   */
  std::vector<int> indices{0, 0, 1, 1, 0, 3};
  /* 0 1
   * 2 3
   * 4 5
   * 6 7
   * 8 9
   * 10 11
   */
  std::vector<float> grad;
  for (int i = 0; i < 6 * 2; i++) {
    grad.push_back(i);
  }
  std::vector<int> unique_indices(3);
  std::vector<float> summed_grad(6);
  SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0});
  DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2);
  EXPECT_EQ(unique_grad.indices_size_, 3);
  EXPECT_EQ(unique_indices, std::vector<int>({0, 1, 3}));
  /* 10 13
   * 10 12
   * 10 11
   */
  EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12, 10, 11}));
 }

 TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest2) {
  // The indices is a vector and the grad is a tensor with shape (6, 2)
  /* 0
   * 0
   * 1
   * 1
   * 0
   * 6
   */
  std::vector<int> indices{0, 0, 1, 1, 0, 6};
  /* 0 1
   * 2 3
   * 4 5
   * 6 7
   * 8 9
   * 10 11
   */
  std::vector<float> grad;
  for (int i = 0; i < 6 * 2; i++) {
    grad.push_back(i);
  }
  std::vector<int> unique_indices(2);
  std::vector<float> summed_grad(4);
  SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0});
  DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2);
  EXPECT_EQ(unique_grad.indices_size_, 2);
  EXPECT_EQ(unique_indices, std::vector<int>({0, 1}));
  /* 10 13
   * 10 12
   */
  EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12}));
 }
 }  // namespace kernel
 }  // namespace mindspore