Browse Source

sync cpu op review on master

tags/v1.6.0
zuochuanyong 4 years ago
parent
commit
0e882f44cc
73 changed files with 754 additions and 699 deletions
  1. +13
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/elu_grad_cpu_kernel.cc
  2. +2
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/elu_grad_cpu_kernel.h
  3. +20
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
  4. +3
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h
  5. +24
    -27
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
  6. +5
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h
  7. +13
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc
  8. +3
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h
  9. +13
    -16
      mindspore/ccsrc/backend/kernel_compiler/cpu/expm1_cpu_kernel.cc
  10. +2
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/expm1_cpu_kernel.h
  11. +29
    -25
      mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc
  12. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h
  13. +11
    -7
      mindspore/ccsrc/backend/kernel_compiler/cpu/gather_d_cpu_kernel.cc
  14. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/gather_d_cpu_kernel.h
  15. +11
    -8
      mindspore/ccsrc/backend/kernel_compiler/cpu/gather_d_grad_cpu_kernel.cc
  16. +6
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/gather_d_grad_cpu_kernel.h
  17. +18
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/gathernd_cpu_kernel.cc
  18. +3
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/gathernd_cpu_kernel.h
  19. +20
    -19
      mindspore/ccsrc/backend/kernel_compiler/cpu/hsigmoid_cpu_kernel.cc
  20. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/hsigmoid_cpu_kernel.h
  21. +20
    -19
      mindspore/ccsrc/backend/kernel_compiler/cpu/hsigmoid_grad_cpu_kernel.cc
  22. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/hsigmoid_grad_cpu_kernel.h
  23. +19
    -19
      mindspore/ccsrc/backend/kernel_compiler/cpu/hswish_cpu_kernel.cc
  24. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/hswish_cpu_kernel.h
  25. +22
    -20
      mindspore/ccsrc/backend/kernel_compiler/cpu/hswish_grad_cpu_kernel.cc
  26. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/hswish_grad_cpu_kernel.h
  27. +15
    -18
      mindspore/ccsrc/backend/kernel_compiler/cpu/isfinite_cpu_kernel.cc
  28. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/isfinite_cpu_kernel.h
  29. +14
    -17
      mindspore/ccsrc/backend/kernel_compiler/cpu/isnan_cpu_kernel.cc
  30. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/isnan_cpu_kernel.h
  31. +10
    -14
      mindspore/ccsrc/backend/kernel_compiler/cpu/l2loss_cpu_kernel.cc
  32. +3
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/l2loss_cpu_kernel.h
  33. +11
    -13
      mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.cc
  34. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h
  35. +18
    -21
      mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.cc
  36. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h
  37. +25
    -18
      mindspore/ccsrc/backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.cc
  38. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.h
  39. +16
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/map_uniform_cpu_kernel.cc
  40. +3
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/map_uniform_cpu_kernel.h
  41. +20
    -33
      mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_cpu_kernel.cc
  42. +11
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_cpu_kernel.h
  43. +10
    -13
      mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_grad_cpu_kernel.cc
  44. +7
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_grad_cpu_kernel.h
  45. +20
    -33
      mindspore/ccsrc/backend/kernel_compiler/cpu/minimum_cpu_kernel.cc
  46. +11
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/minimum_cpu_kernel.h
  47. +13
    -18
      mindspore/ccsrc/backend/kernel_compiler/cpu/minimum_grad_cpu_kernel.cc
  48. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/minimum_grad_cpu_kernel.h
  49. +14
    -17
      mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.cc
  50. +8
    -8
      mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h
  51. +35
    -45
      mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.cc
  52. +11
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h
  53. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_avg_grad_cpu_kernel.cc
  54. +7
    -7
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc
  55. +2
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h
  56. +4
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.cc
  57. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.h
  58. +10
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc
  59. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h
  60. +19
    -9
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc
  61. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h
  62. +19
    -10
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
  63. +3
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
  64. +13
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
  65. +6
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.h
  66. +10
    -22
      mindspore/ccsrc/backend/kernel_compiler/cpu/pack_cpu_kernel.cc
  67. +12
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/pack_cpu_kernel.h
  68. +16
    -22
      mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.cc
  69. +7
    -7
      mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.h
  70. +17
    -8
      mindspore/ccsrc/backend/kernel_compiler/cpu/range_cpu_kernel.cc
  71. +6
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/range_cpu_kernel.h
  72. +25
    -17
      mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
  73. +3
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h

+ 13
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/elu_grad_cpu_kernel.cc View File

@@ -13,16 +13,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/elu_grad_cpu_kernel.h"
#include <cmath>
#include <string>
#include <thread>
#include "backend/kernel_compiler/cpu/elu_grad_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kEleGradInputsNum = 2;
constexpr size_t kEleGradOutputsNum = 1;
} // namespace

void EluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (dtype_ != AnfAlgo::GetInputDeviceDataType(kernel_node, 1)) {
MS_LOG(EXCEPTION) << "Input0 and input1 must has the same data type";
@@ -31,6 +38,8 @@ void EluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool EluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEleGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEleGradOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat) {
LaunchKernel<float>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat16) {
@@ -44,9 +53,9 @@ bool EluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, con
template <typename T>
void EluGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) const {
T *input0 = reinterpret_cast<T *>(inputs[0]->addr);
T *input1 = reinterpret_cast<T *>(inputs[1]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);
const auto *input0 = reinterpret_cast<T *>(inputs[0]->addr);
const auto *input1 = reinterpret_cast<T *>(inputs[1]->addr);
auto *output = reinterpret_cast<T *>(outputs[0]->addr);

size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
auto task = [input0, input1, output](const size_t start, const size_t end) {


+ 2
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/elu_grad_cpu_kernel.h View File

@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELU_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELU_GRAD_CPU_KERNEL_H_

#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"


+ 20
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,18 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thread>
#include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
#include <thread>
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_interface.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kEmbeddingLookupCommGradInputsNum = 1;
constexpr size_t kEmbeddingLookupCommGradOutputsNum = 1;
} // namespace

void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
split_num_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "split_num");
MS_LOG(INFO) << "split_num: " << split_num_;
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (split_num_ == 0) {
MS_LOG(EXCEPTION) << "The split_num_ must be larger than 0.";
}
if (input_shape.size() < 1) {
MS_LOG(EXCEPTION) << "The size of input's shape must be at least 1.";
}
if (input_shape[0] % split_num_ != 0) {
MS_LOG(EXCEPTION) << "Input shape[0] is " << input_shape[0] << ", but it must be multiple of split_num.";
}
@@ -33,14 +46,16 @@ void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEmbeddingLookupCommGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEmbeddingLookupCommGradOutputsNum, kernel_name_);
#if defined(_WIN32) || defined(_WIN64)
auto start_time = std::chrono::steady_clock::now();
#else
struct timeval start_time, end_time;
(void)gettimeofday(&start_time, nullptr);
#endif
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<float *>(outputs[0]->addr);
size_t input_size = inputs[0]->size;
size_t output_size = outputs[0]->size;
MS_LOG(DEBUG) << "input addr: " << input_addr << "input size: " << input_size;
@@ -67,12 +82,5 @@ bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressP
#endif
return true;
}

void EmbeddingLookUpCommGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCommGradCPUKernel needs 1.";
}
}
} // namespace kernel
} // namespace mindspore

+ 3
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -33,7 +35,6 @@ class EmbeddingLookUpCommGradCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
int64_t split_num_;
};



+ 24
- 27
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,9 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
#include <thread>
#include <string>
#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "ir/primitive.h"
#include "common/thread_pool.h"
@@ -23,6 +24,11 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kBlockSize = 10000;
constexpr size_t kEmbeddingLookupInputsNum = 2;
constexpr size_t kEmbeddingLookupOutputsNum = 1;
constexpr size_t kEmbeddingLookupInputParamsMaxDim = 2;

template <typename T>
void LookUpTableTask(const float *input_addr, const T *indices_addr, float *output_addr, size_t indices_lens,
size_t outer_dim_size, T offset, size_t first_dim_size) {
@@ -48,11 +54,13 @@ void LookUpTableTask(const float *input_addr, const T *indices_addr, float *outp
} // namespace

void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
node_wpt_ = kernel_node;
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.empty()) {
MS_LOG(EXCEPTION) << "Param must be at least 1D";
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.empty() || input_shape.size() > kEmbeddingLookupInputParamsMaxDim) {
MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel support 1-" << kEmbeddingLookupInputParamsMaxDim
<< "D input tensor, but got " << input_shape.size() << "D.";
}
first_dim_size_ = input_shape[0];
outer_dim_size_ = 1;
@@ -74,11 +82,11 @@ template <typename T>
void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
if (!node_wpt_.expired()) {
auto node_ = node_wpt_.lock();
if (!node_) {
auto node = node_wpt_.lock();
if (!node) {
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
}
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0);
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
if (input_shape.empty()) {
MS_LOG(EXCEPTION) << "Param must be at least 1D";
}
@@ -89,15 +97,15 @@ void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr
}

indices_lens_ = 1;
std::vector<size_t> indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1);
std::vector<size_t> indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
for (const auto &shape : indices_shape) {
indices_lens_ *= shape;
}
}
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto indices_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
size_t thread_num = indices_lens_ / 10000 + 1;
const auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr);
const auto *indices_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto *output_addr = reinterpret_cast<float *>(outputs[0]->addr);
size_t thread_num = indices_lens_ / kBlockSize + 1;
auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
thread_num = thread_num > max_thread_num ? max_thread_num : thread_num;
std::vector<common::Task> tasks;
@@ -127,6 +135,8 @@ void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr
bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEmbeddingLookupInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEmbeddingLookupOutputsNum, kernel_name_);
if (indices_data_type_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else {
@@ -134,18 +144,5 @@ bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
}
return true;
}

void EmbeddingLookUpCPUKernel::CheckParam(const CNodePtr &kernel_node) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size()
<< ", but EmbeddingLookUpCPUKernel only support 4d or lower.";
}

size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCPUKernel needs 2.";
}
}
} // namespace kernel
} // namespace mindspore

+ 5
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -31,11 +33,11 @@ class EmbeddingLookUpCPUKernel : public CPUKernel {

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

protected:
template <typename T>
void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);

protected:
void CheckParam(const CNodePtr &kernel_node);
int64_t offset_{0};
size_t indices_lens_{1};
size_t first_dim_size_{1};


+ 13
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,18 +13,26 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void EqualCountCPUKernel::InitKernel(const CNodePtr &) {}
namespace {
constexpr size_t kEqualCountInputsNum = 2;
constexpr size_t kEqualCountOutputsNum = 1;
} // namespace

void EqualCountCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
}

bool EqualCountCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "Input or output empty!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEqualCountInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEqualCountOutputsNum, kernel_name_);
if (inputs[0]->size != inputs[1]->size) {
MS_LOG(EXCEPTION) << "Input or output size!";
}


+ 3
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EQUAL_COUNT_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EQUAL_COUNT_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"


+ 13
- 16
mindspore/ccsrc/backend/kernel_compiler/cpu/expm1_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,28 +20,27 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kExpm1InputsNum = 1;
constexpr size_t kExpm1OutputsNum = 1;
} // namespace

void Expm1CPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Expm1CPUKernel needs 1 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Expm1CPUKernel needs 1 output.";
}

kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (input_dtype_ != kNumberTypeFloat16 && input_dtype_ != kNumberTypeFloat32 && input_dtype_ != kNumberTypeFloat) {
if (input_dtype_ != kNumberTypeFloat16 && input_dtype_ != kNumberTypeFloat32) {
MS_LOG(EXCEPTION) << "Unsupported input type found.";
}
}

bool Expm1CPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kExpm1InputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kExpm1OutputsNum, kernel_name_);
if (input_dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (input_dtype_ == kNumberTypeFloat32 || input_dtype_ == kNumberTypeFloat) {
} else if (input_dtype_ == kNumberTypeFloat32) {
LaunchKernel<float>(inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "Only support float, half, but actual data type is " << TypeIdLabel(input_dtype_);
@@ -52,11 +51,9 @@ bool Expm1CPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const
template <typename T>
void Expm1CPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
T *input = reinterpret_cast<T *>(inputs[0]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);

const auto *input = reinterpret_cast<T *>(inputs[0]->addr);
auto *output = reinterpret_cast<T *>(outputs[0]->addr);
size_t elem_num = inputs[0]->size / sizeof(T);

for (size_t i = 0; i < elem_num; i++) {
output[i] = exp(input[i]) - T(1);
}


+ 2
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/expm1_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EXPM1_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EXPM1_CPU_KERNEL_H_

@@ -45,8 +46,6 @@ MS_REG_CPU_KERNEL(Expm1, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutput

MS_REG_CPU_KERNEL(Expm1, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
Expm1CPUKernel);

MS_REG_CPU_KERNEL(Expm1, KernelAttr().AddInputAttr(kNumberTypeFloat).AddOutputAttr(kNumberTypeFloat32), Expm1CPUKernel);
} // namespace kernel
} // namespace mindspore



+ 29
- 25
mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/gather_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "nnacl/gather_parameter.h"
@@ -21,12 +22,23 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kGatherInputsNum = 2;
constexpr size_t kGatherOutputsNum = 1;
constexpr size_t kGatherInputParamsMaxDim = 4;
} // namespace

template <typename T>
void GatherV2CPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (input_shape_.size() > kGatherInputParamsMaxDim) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape_.size() << ", but GatherV2CPUKernel olny support "
<< kGatherInputParamsMaxDim << "D or lower.";
}
if (!is_dynamic_shape_) {
axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
}
@@ -36,9 +48,11 @@ template <typename T>
bool GatherV2CPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto input_tensor = reinterpret_cast<int8_t *>(inputs[0]->addr);
indices_data_ = reinterpret_cast<int32_t *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<int8_t *>(outputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherOutputsNum, kernel_name_);
const auto *input_tensor = reinterpret_cast<int8_t *>(inputs[0]->addr);
const auto *indices_data = reinterpret_cast<int32_t *>(inputs[1]->addr);
auto *output_addr = reinterpret_cast<int8_t *>(outputs[0]->addr);
if (is_dynamic_shape_) {
axis_ = reinterpret_cast<int64_t *>(inputs[2]->addr)[0];
}
@@ -51,13 +65,14 @@ bool GatherV2CPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
axis_ = axis_ + dims;
}

int max_thread_num = static_cast<int>(common::ThreadPool::GetInstance().GetSyncRunThreadNum());
ParallelRun(input_tensor, output_addr, max_thread_num);
int max_thread_num = SizeToInt(common::ThreadPool::GetInstance().GetSyncRunThreadNum());
ParallelRun(input_tensor, indices_data, output_addr, max_thread_num);
return true;
}

template <typename T>
void GatherV2CPUKernel<T>::ParallelRun(int8_t *input_addr, int8_t *output_addr, int thread_num) {
void GatherV2CPUKernel<T>::ParallelRun(const int8_t *input_addr, const int *indices_data, int8_t *output_addr,
int thread_num) {
size_t outer_size = 1, inner_size = 1;
auto axis = static_cast<size_t>(axis_);
for (size_t i = 0; i < axis; ++i) {
@@ -76,12 +91,14 @@ void GatherV2CPUKernel<T>::ParallelRun(int8_t *input_addr, int8_t *output_addr,
int thread_index = 0;
while (thread_index < thread_num) {
int count = SizeToInt(MSMIN(stride, outer_size - stride * IntToSize(thread_index)));
if (count <= 0) break;
if (count <= 0) {
break;
}
auto thread_stride = static_cast<size_t>(stride * thread_index);
int8_t *in = input_addr + thread_stride * limit * inner_size * sizeof(T);
const int8_t *in = input_addr + thread_stride * limit * inner_size * sizeof(T);
int8_t *out = output_addr + thread_stride * indices_element_size * inner_size * sizeof(T);
auto block = [this, in, count, inner_size, limit, indices_element_size, out, thread_index]() {
int ret = Gather(in, count, inner_size, limit, indices_data_, indices_element_size, out, sizeof(T));
auto block = [this, in, indices_data, count, inner_size, limit, indices_element_size, out, thread_index]() {
int ret = Gather(in, count, inner_size, limit, indices_data, indices_element_size, out, sizeof(T));
if (ret != 0) {
MS_LOG(ERROR) << "GatherRun error task_id[" << thread_index << "] error_code[" << ret << "]";
return common::FAIL;
@@ -95,18 +112,5 @@ void GatherV2CPUKernel<T>::ParallelRun(int8_t *input_addr, int8_t *output_addr,
MS_LOG(EXCEPTION) << "SyncRun error!";
}
}

template <typename T>
void GatherV2CPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num == 3) {
is_dynamic_shape_ = true;
MS_LOG(DEBUG) << " GatherV2CPUKernel running in Dynamic Mode.";
} else if (input_num == 2) {
MS_LOG(DEBUG) << " GatherV2CPUKernel running in Normal Mode.";
} else {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherV2CPUKernel needs 2.";
}
}
} // namespace kernel
} // namespace mindspore

+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -35,12 +37,10 @@ class GatherV2CPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
void ParallelRun(int8_t *input_addr, int8_t *output_addr, int thread_num);
void ParallelRun(const int8_t *input_addr, const int *indices_data, int8_t *output_addr, int thread_num);
std::vector<size_t> input_shape_;
std::vector<size_t> indices_shape_;
std::vector<size_t> output_shape_;
int *indices_data_ = nullptr;
int64_t axis_{0};
bool is_dynamic_shape_{false};
};


+ 11
- 7
mindspore/ccsrc/backend/kernel_compiler/cpu/gather_d_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,6 +19,9 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kGatherDInputsNum = 3;
constexpr size_t kGatherDOutputsNum = 1;

size_t get_element_num(const std::vector<size_t> &shape) {
size_t size = 1;
for (size_t i = 0; i < shape.size(); i++) {
@@ -63,6 +66,8 @@ void CopyTask(size_t cur, std::vector<size_t> *pos, T *input, const I *index, co

template <typename T, typename I>
void GatherDCPUKernel<T, I>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
index_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
if (input_shape_.size() != index_shape_.size()) {
@@ -76,6 +81,8 @@ template <typename T, typename I>
bool GatherDCPUKernel<T, I>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherDInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherDOutputsNum, kernel_name_);
size_t input_size = get_element_num(input_shape_) * sizeof(T);
size_t index_size = get_element_num(index_shape_) * sizeof(I);
size_t dim_size = sizeof(int);
@@ -83,17 +90,15 @@ bool GatherDCPUKernel<T, I>::Launch(const std::vector<kernel::AddressPtr> &input
if (inputs[0]->size != input_size || inputs[1]->size != dim_size || inputs[2]->size != index_size ||
outputs[0]->size != output_size) {
MS_LOG(EXCEPTION) << "invalid input or output data size!";
return false;
}
auto input = reinterpret_cast<T *>(inputs[0]->addr);
auto dim = reinterpret_cast<int32_t *>(inputs[1]->addr);
auto index = reinterpret_cast<I *>(inputs[2]->addr);
auto *input = reinterpret_cast<T *>(inputs[0]->addr);
auto *dim = reinterpret_cast<int32_t *>(inputs[1]->addr);
auto *index = reinterpret_cast<I *>(inputs[2]->addr);
auto output = reinterpret_cast<T *>(outputs[0]->addr);
int32_t input_rank = SizeToInt(input_shape_.size());
if (dim[0] >= input_rank || dim[0] < -input_rank) {
MS_LOG(EXCEPTION) << "The value of 'dim' should be in [" << -input_rank << ", " << input_rank
<< "], but got: " << dim[0];
return false;
}
if (dim[0] < 0) {
dim[0] = static_cast<int>(dim[0] + input_rank);
@@ -105,7 +110,6 @@ bool GatherDCPUKernel<T, I>::Launch(const std::vector<kernel::AddressPtr> &input
if (index[i] >= max_index || index[i] < -max_index) {
MS_LOG(EXCEPTION) << "The value of index should be in [" << -max_index << ", " << max_index
<< "], but got: " << index[i];
return false;
}
if (index[i] < 0) {
index[i] = max_index + index[i];


+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/gather_d_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERD_CPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -52,4 +52,4 @@ MS_REG_CPU_KERNEL_T_S(GatherD, KernelAttr(), GatherDCPUKernel, bool, int64_t);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERD_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_CPU_KERNEL_H_

+ 11
- 8
mindspore/ccsrc/backend/kernel_compiler/cpu/gather_d_grad_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,12 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/gather_d_grad_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kGatherDGradInputsNum = 2;
constexpr size_t kGatherDGradOutputsNum = 1;

size_t get_element_num(const std::vector<size_t> &shape) {
size_t size = 1;
for (size_t i = 0; i < shape.size(); i++) {
@@ -59,6 +63,8 @@ void GatherDGradCopyTask(size_t cur, std::vector<size_t> *pos, T *input, I *inde

template <typename I, typename T>
void GatherDGradCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
index_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
if (input_shape_ != index_shape_) {
@@ -72,25 +78,23 @@ template <typename I, typename T>
bool GatherDGradCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherDGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherDGradOutputsNum, kernel_name_);
size_t input_size = get_element_num(input_shape_) * sizeof(T);
size_t index_size = get_element_num(index_shape_) * sizeof(I);
size_t output_size = get_element_num(output_shape_) * sizeof(T);
if (inputs[0]->size != index_size || inputs[1]->size != input_size || outputs[0]->size != output_size) {
MS_LOG(EXCEPTION) << "invalid input or output data size!";
return false;
}

auto index = reinterpret_cast<I *>(inputs[0]->addr);
auto input = reinterpret_cast<T *>(inputs[1]->addr);
auto *index = reinterpret_cast<I *>(inputs[0]->addr);
auto *input = reinterpret_cast<T *>(inputs[1]->addr);
auto out = reinterpret_cast<T *>(outputs[0]->addr);

int output_rank = SizeToInt(output_shape_.size());
if (axis_ >= output_rank || axis_ < -output_rank) {
MS_LOG(EXCEPTION) << "The value of 'axis_' should be in [" << -output_rank << ", " << output_rank
<< "], but got: " << axis_;
return false;
}

if (axis_ < 0) {
axis_ = axis_ + SizeToInt(output_shape_.size());
}
@@ -102,7 +106,6 @@ bool GatherDGradCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &i
if (index[i] >= max_index || index[i] < -max_index) {
MS_LOG(EXCEPTION) << "The value of index should be in [" << -max_index << ", " << max_index
<< "], but got: " << index[i];
return false;
}
if (index[i] < 0) {
index[i] = max_index + index[i];


+ 6
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/gather_d_grad_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERDGRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERDGRAD_CPU_KERNEL_H_

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_GRAD_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -37,7 +39,7 @@ class GatherDGradCPUKernel : public CPUKernel {
std::vector<size_t> input_shape_;
std::vector<size_t> index_shape_;
std::vector<size_t> output_shape_;
int32_t axis_;
int32_t axis_{1};
};

MS_REG_CPU_KERNEL_T_S(GatherDGrad, KernelAttr(), GatherDGradCPUKernel, int32_t, int32_t);


+ 18
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/gathernd_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,14 +13,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/gathernd_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#define MAX_INT (((unsigned int)(-1)) >> 1)

namespace mindspore {
namespace kernel {
namespace {
#define MAX_INT (((unsigned int)(-1)) >> 1)

constexpr size_t kGatherNdInputsNum = 2;
constexpr size_t kGatherNdOutputsNum = 1;
} // namespace

template <typename T>
void GatherNdCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
@@ -35,6 +44,9 @@ void GatherNdCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {

size_t dim_after_indices = 1;
size_t dim_indices_last = indices_shapes_[indices_shapes_.size() - IntToSize(1)];
if (dim_indices_last == 0) {
MS_LOG(EXCEPTION) << "Value of indices_shapes_[" << indices_shapes_.size() << " - 1] should not be 0";
}
for (size_t i = dim_indices_last; i < input_shapes_.size(); i++) {
dim_after_indices *= input_shapes_[i];
}
@@ -61,8 +73,10 @@ template <typename T>
bool GatherNdCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherNdInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherNdOutputsNum, kernel_name_);
const auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr);
const auto *indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);

size_t output_dim0 = dims_[0];


+ 3
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/gathernd_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERND_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERND_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -56,7 +58,6 @@ MS_REG_CPU_KERNEL_T(GatherNd, KernelAttr(), GatherNdCPUKernel, uint32_t);
MS_REG_CPU_KERNEL_T(GatherNd, KernelAttr(), GatherNdCPUKernel, uint64_t);
MS_REG_CPU_KERNEL_T(GatherNd, KernelAttr(), GatherNdCPUKernel, float);
MS_REG_CPU_KERNEL_T(GatherNd, KernelAttr(), GatherNdCPUKernel, double);

} // namespace kernel
} // namespace mindspore



+ 20
- 19
mindspore/ccsrc/backend/kernel_compiler/cpu/hsigmoid_cpu_kernel.cc View File

@@ -20,9 +20,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kHSigmoidInputsNum = 1;
constexpr size_t kHSigmoidOutputsNum = 1;
} // namespace

template <typename T>
void HSigmoidCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
for (const uint64_t &d : x_shape_) {
tensor_size_ *= d;
@@ -33,33 +39,28 @@ template <typename T>
bool HSigmoidCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto x = reinterpret_cast<T *>(inputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kHSigmoidInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kHSigmoidOutputsNum, kernel_name_);
const auto *x = reinterpret_cast<T *>(inputs[0]->addr);
auto y = reinterpret_cast<T *>(outputs[0]->addr);
auto zero = static_cast<T>(0);
auto one = static_cast<T>(1);
auto three = static_cast<T>(3);
auto six = static_cast<T>(6);

auto task = [&](size_t start, size_t end) {
for (uint64_t i = start; i < end; ++i) {
if (x[i] <= -3) {
y[i] = 0;
} else if (x[i] >= 3) {
y[i] = 1;
if (x[i] + three <= zero) {
y[i] = zero;
} else if (x[i] >= three) {
y[i] = one;
} else {
y[i] = (x[i] + 3) / 6;
y[i] = (x[i] + three) / six;
}
}
};
CPUKernelUtils::ParallelFor(task, tensor_size_);
return true;
}

template <typename T>
void HSigmoidCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidCPUKernel needs 1 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/hsigmoid_cpu_kernel.h View File

@@ -14,8 +14,9 @@
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -36,7 +37,6 @@ class HSigmoidCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> x_shape_;
uint64_t tensor_size_ = 1;
};
@@ -52,4 +52,4 @@ MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr(), HSigmoidCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr(), HSigmoidCPUKernel, float);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_CPU_KERNEL_H_

+ 20
- 19
mindspore/ccsrc/backend/kernel_compiler/cpu/hsigmoid_grad_cpu_kernel.cc View File

@@ -20,9 +20,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kHSigmoidGradInputsNum = 2;
constexpr size_t kHSigmoidGradOutputsNum = 1;
} // namespace

template <typename T>
void HSigmoidGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
for (const uint64_t &d : x_shape_) {
tensor_size_ *= d;
@@ -33,32 +39,27 @@ template <typename T>
bool HSigmoidGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto dy = reinterpret_cast<T *>(inputs[0]->addr);
auto x = reinterpret_cast<T *>(inputs[1]->addr);
auto out = reinterpret_cast<T *>(outputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kHSigmoidGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kHSigmoidGradOutputsNum, kernel_name_);
const auto *dy = reinterpret_cast<T *>(inputs[0]->addr);
const auto *x = reinterpret_cast<T *>(inputs[1]->addr);
auto *out = reinterpret_cast<T *>(outputs[0]->addr);

auto zero = static_cast<T>(0);
auto three = static_cast<T>(3);
auto six = static_cast<T>(6);

auto task = [&](size_t start, size_t end) {
for (uint64_t i = start; i < end; ++i) {
if (x[i] <= -3 || x[i] >= 3) {
out[i] = 0;
if (x[i] + three <= zero || x[i] >= three) {
out[i] = zero;
} else {
out[i] = dy[i] / 6;
out[i] = dy[i] / six;
}
}
};
CPUKernelUtils::ParallelFor(task, tensor_size_);
return true;
}

template <typename T>
void HSigmoidGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGradCPUKernel needs 2 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidGradCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/hsigmoid_grad_cpu_kernel.h View File

@@ -14,8 +14,9 @@
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_GRAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -36,7 +37,6 @@ class HSigmoidGradCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> x_shape_;
uint64_t tensor_size_ = 1;
};
@@ -48,4 +48,4 @@ MS_REG_CPU_KERNEL_T(HSigmoidGrad, KernelAttr(), HSigmoidGradCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(HSigmoidGrad, KernelAttr(), HSigmoidGradCPUKernel, float);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_GRAD_CPU_KERNEL_H_

+ 19
- 19
mindspore/ccsrc/backend/kernel_compiler/cpu/hswish_cpu_kernel.cc View File

@@ -20,9 +20,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kHSwishInputsNum = 1;
constexpr size_t kHSwishOutputsNum = 1;
} // namespace

template <typename T>
void HSwishCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
for (const uint64_t &d : x_shape_) {
tensor_size_ *= d;
@@ -32,33 +38,27 @@ void HSwishCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
bool HSwishCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto x = reinterpret_cast<T *>(inputs[0]->addr);
auto y = reinterpret_cast<T *>(outputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kHSwishInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kHSwishOutputsNum, kernel_name_);
const auto *x = reinterpret_cast<T *>(inputs[0]->addr);
auto *y = reinterpret_cast<T *>(outputs[0]->addr);
auto zero = static_cast<T>(0);
auto three = static_cast<T>(3);
auto six = static_cast<T>(6);

auto task = [&](size_t start, size_t end) {
for (uint64_t i = start; i < end; ++i) {
if (x[i] <= -3) {
y[i] = 0;
} else if (x[i] >= 3) {
if (x[i] + three <= zero) {
y[i] = zero;
} else if (x[i] >= three) {
y[i] = x[i];
} else {
y[i] = x[i] * (x[i] + 3) / 6;
y[i] = x[i] * (x[i] + three) / six;
}
}
};
CPUKernelUtils::ParallelFor(task, tensor_size_);
return true;
}

template <typename T>
void HSwishCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwishCPUKernel needs 1 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSwishCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/hswish_cpu_kernel.h View File

@@ -14,8 +14,9 @@
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -36,7 +37,6 @@ class HSwishCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> x_shape_;
uint64_t tensor_size_ = 1;
};
@@ -48,4 +48,4 @@ MS_REG_CPU_KERNEL_T(HSwish, KernelAttr(), HSwishCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(HSwish, KernelAttr(), HSwishCPUKernel, float);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_CPU_KERNEL_H_

+ 22
- 20
mindspore/ccsrc/backend/kernel_compiler/cpu/hswish_grad_cpu_kernel.cc View File

@@ -20,9 +20,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kHSwishGradInputsNum = 2;
constexpr size_t kHSwishGradOutputsNum = 1;
} // namespace

template <typename T>
void HSwishGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
for (const uint64_t &d : x_shape_) {
tensor_size_ *= d;
@@ -33,34 +39,30 @@ template <typename T>
bool HSwishGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto dy = reinterpret_cast<T *>(inputs[0]->addr);
auto x = reinterpret_cast<T *>(inputs[1]->addr);
auto out = reinterpret_cast<T *>(outputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kHSwishGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kHSwishGradOutputsNum, kernel_name_);
const auto *dy = reinterpret_cast<T *>(inputs[0]->addr);
const auto *x = reinterpret_cast<T *>(inputs[1]->addr);
auto *out = reinterpret_cast<T *>(outputs[0]->addr);

auto zero = static_cast<T>(0);
auto two = static_cast<T>(2);
auto three = static_cast<T>(3);
auto six = static_cast<T>(6);

auto task = [&](size_t start, size_t end) {
for (uint64_t i = start; i < end; ++i) {
if (x[i] <= -3) {
out[i] = 0;
} else if (x[i] >= 3) {
if (x[i] + three <= zero) {
out[i] = zero;
} else if (x[i] >= three) {
out[i] = dy[i];
} else {
out[i] = dy[i] * (2 * x[i] + 3) / 6;
out[i] = dy[i] * (two * x[i] + three) / six;
}
}
};
CPUKernelUtils::ParallelFor(task, tensor_size_);
return true;
}

template <typename T>
void HSwishGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwishGradCPUKernel needs 2 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSwishGradCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/hswish_grad_cpu_kernel.h View File

@@ -14,8 +14,9 @@
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_GRAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -36,7 +37,6 @@ class HSwishGradCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> x_shape_;
uint64_t tensor_size_ = 1;
};
@@ -48,4 +48,4 @@ MS_REG_CPU_KERNEL_T(HSwishGrad, KernelAttr(), HSwishGradCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(HSwishGrad, KernelAttr(), HSwishGradCPUKernel, float);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_GRAD_CPU_KERNEL_H_

+ 15
- 18
mindspore/ccsrc/backend/kernel_compiler/cpu/isfinite_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,18 +21,15 @@

namespace mindspore {
namespace kernel {
void IsFiniteCPUKernel::InitKernel(const CNodePtr &kernelNode) {
MS_EXCEPTION_IF_NULL(kernelNode);
size_t input_num = AnfAlgo::GetInputTensorNum(kernelNode);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but IsFiniteCPUKernel needs 1 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernelNode);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but IsFiniteCPUKernel needs 1 output.";
}
namespace {
constexpr size_t kIsFiniteInputsNum = 1;
constexpr size_t kIsFiniteOutputsNum = 1;
} // namespace

input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernelNode, 0);
void IsFiniteCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (dtype_map_.find(input_dtype_) == dtype_map_.end()) {
MS_LOG(EXCEPTION) << "Unsupported input type found.";
}
@@ -40,24 +37,24 @@ void IsFiniteCPUKernel::InitKernel(const CNodePtr &kernelNode) {

bool IsFiniteCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kIsFiniteInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kIsFiniteOutputsNum, kernel_name_);
if (input_dtype_ == kNumberTypeFloat16) {
LaunchKernelFloat16(inputs, outputs);
} else if (input_dtype_ == kNumberTypeFloat32 || input_dtype_ == kNumberTypeFloat) {
} else if (input_dtype_ == kNumberTypeFloat32) {
LaunchKernelFloat<float>(inputs, outputs);
} else if (input_dtype_ == kNumberTypeFloat64) {
LaunchKernelFloat<double>(inputs, outputs);
} else if (dtype_map_.find(input_dtype_) != dtype_map_.end()) {
LaunchKernelOther(inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "Only support bool, int, uint, float, but actual data type is " << TypeIdLabel(input_dtype_);
LaunchKernelOther(inputs, outputs);
}
return true;
}

void IsFiniteCPUKernel::LaunchKernelFloat16(const std::vector<AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) const {
float16 *input = reinterpret_cast<float16 *>(inputs[0]->addr);
bool *output = reinterpret_cast<bool *>(outputs[0]->addr);
const auto *input = reinterpret_cast<float16 *>(inputs[0]->addr);
auto *output = reinterpret_cast<bool *>(outputs[0]->addr);

size_t elem_num = inputs[0]->size / sizeof(float16);



+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/isfinite_cpu_kernel.h View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ISFINITE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ISFINITE_CPU_KERNEL_H_



+ 14
- 17
mindspore/ccsrc/backend/kernel_compiler/cpu/isnan_cpu_kernel.cc View File

@@ -21,18 +21,15 @@

namespace mindspore {
namespace kernel {
void IsNanCPUKernel::InitKernel(const CNodePtr &kernelNode) {
MS_EXCEPTION_IF_NULL(kernelNode);
size_t input_num = AnfAlgo::GetInputTensorNum(kernelNode);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but IsNanCPUKernel needs 1 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernelNode);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but IsNanCPUKernel needs 1 output.";
}
namespace {
constexpr size_t kIsNanInputsNum = 1;
constexpr size_t kIsNanOutputsNum = 1;
} // namespace

input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernelNode, 0);
void IsNanCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (dtype_map_.find(input_dtype_) == dtype_map_.end()) {
MS_LOG(EXCEPTION) << "Unsupported input type found.";
}
@@ -40,24 +37,24 @@ void IsNanCPUKernel::InitKernel(const CNodePtr &kernelNode) {

bool IsNanCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kIsNanInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kIsNanOutputsNum, kernel_name_);
if (input_dtype_ == kNumberTypeFloat16) {
LaunchKernelFloat16(inputs, outputs);
} else if (input_dtype_ == kNumberTypeFloat32 || input_dtype_ == kNumberTypeFloat) {
} else if (input_dtype_ == kNumberTypeFloat32) {
LaunchKernelFloat<float>(inputs, outputs);
} else if (input_dtype_ == kNumberTypeFloat64) {
LaunchKernelFloat<double>(inputs, outputs);
} else if (dtype_map_.find(input_dtype_) != dtype_map_.end()) {
LaunchKernelOther(inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "Only support bool, int, uint, float, but actual data type is " << TypeIdLabel(input_dtype_);
LaunchKernelOther(inputs, outputs);
}
return true;
}

void IsNanCPUKernel::LaunchKernelFloat16(const std::vector<AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
float16 *input = reinterpret_cast<float16 *>(inputs[0]->addr);
bool *output = reinterpret_cast<bool *>(outputs[0]->addr);
const auto *input = reinterpret_cast<float16 *>(inputs[0]->addr);
auto *output = reinterpret_cast<bool *>(outputs[0]->addr);

size_t elem_num = inputs[0]->size / sizeof(float16);



+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/isnan_cpu_kernel.h View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ISNAN_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ISNAN_CPU_KERNEL_H_



+ 10
- 14
mindspore/ccsrc/backend/kernel_compiler/cpu/l2loss_cpu_kernel.cc View File

@@ -19,9 +19,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kL2LossInputsNum = 1;
constexpr size_t kL2LossOutputsNum = 1;
} // namespace

template <typename T>
void L2LossCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
for (const size_t &d : x_shape) {
tensor_size_ *= d;
@@ -31,26 +37,16 @@ void L2LossCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
bool L2LossCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kL2LossInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kL2LossOutputsNum, kernel_name_);
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto result_addr = reinterpret_cast<T *>(outputs[0]->addr);
*result_addr = (T)0;
*result_addr = static_cast<T>(0);
for (size_t i = 0; i < tensor_size_; i++) {
*result_addr += input_addr[i] * input_addr[i];
}
*result_addr = *result_addr / 2;
return true;
}

template <typename T>
void L2LossCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but L2LossCPUKernel needs 1 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but L2LossCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 3
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/l2loss_cpu_kernel.h View File

@@ -16,6 +16,7 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_L2_LOSS_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_L2_LOSS_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -36,8 +37,8 @@ class L2LossCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
size_t tensor_size_{1};
TypeId dtype_{kTypeUnknown};
size_t tensor_size_ = 1;
};

MS_REG_CPU_KERNEL_T(L2Loss, KernelAttr(), L2LossCPUKernel, float16);


+ 11
- 13
mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h"
#include "backend/kernel_compiler/common_utils.h"
#include "runtime/device/cpu/cpu_device_address.h"
@@ -20,8 +21,14 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kLayerNormInputsNum = 3;
constexpr size_t kLayerNormOutputsNum = 3;
} // namespace

void LayerNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto begin_norm_axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "begin_norm_axis");
@@ -48,12 +55,14 @@ void LayerNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool LayerNormCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kLayerNormInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kLayerNormOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) {
LaunchKernel<float>(inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "Input dtype only support float16, float32, float64!";
MS_LOG(EXCEPTION) << "Input dtype only support float16, float32, float64";
}
return true;
}
@@ -111,16 +120,5 @@ void LayerNormCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, con
}
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
}

void LayerNormCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "LayerNormCPUKernel needs 3 inputs, but gets " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 3) {
MS_LOG(EXCEPTION) << "LayerNormCPUKernel expects 3 output, but gets" << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h View File

@@ -16,6 +16,7 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -34,11 +35,10 @@ class LayerNormCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
float eps_{1e-12};
size_t block_num_{1};


+ 18
- 21
mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.cc View File

@@ -21,8 +21,14 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kLayerNormGradInputsNum = 5;
constexpr size_t kLayerNormGradOutputsNum = 3;
} // namespace

void LayerNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto begin_norm_axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "begin_norm_axis");
@@ -53,6 +59,8 @@ void LayerNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool LayerNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kLayerNormGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kLayerNormGradOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) {
@@ -66,14 +74,14 @@ bool LayerNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &input
template <typename T>
void LayerNormGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
auto x = reinterpret_cast<T *>(inputs[0]->addr);
auto dy = reinterpret_cast<T *>(inputs[1]->addr);
auto var = reinterpret_cast<T *>(inputs[2]->addr);
auto mean = reinterpret_cast<T *>(inputs[3]->addr);
auto gamma = reinterpret_cast<T *>(inputs[4]->addr);
auto dx = reinterpret_cast<T *>(outputs[0]->addr);
auto dg = reinterpret_cast<T *>(outputs[1]->addr);
auto db = reinterpret_cast<T *>(outputs[2]->addr);
auto *x = reinterpret_cast<T *>(inputs[0]->addr);
auto *dy = reinterpret_cast<T *>(inputs[1]->addr);
auto *var = reinterpret_cast<T *>(inputs[2]->addr);
auto *mean = reinterpret_cast<T *>(inputs[3]->addr);
auto *gamma = reinterpret_cast<T *>(inputs[4]->addr);
auto *dx = reinterpret_cast<T *>(outputs[0]->addr);
auto *dg = reinterpret_cast<T *>(outputs[1]->addr);
auto *db = reinterpret_cast<T *>(outputs[2]->addr);
size_t thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
auto thread_num1 = param_num_ < thread_num ? param_num_ : thread_num;
std::vector<common::Task> tasks1;
@@ -121,7 +129,7 @@ void LayerNormGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
auto norm_shift = static_cast<int>(j / block_size_);
auto var_sqrt = (T)std::pow(static_cast<double>(var[norm_shift]) + eps_, -0.5);
auto dx1 = dy[j] * gamma[param_shift] * var_sqrt;
auto dx2 = sum1 * (T)2.0 / block_size_ * (x[j] - mean[norm_shift]);
auto dx2 = sum1 * (T)2.0 / (T)(block_size_) * (x[j] - mean[norm_shift]);
auto dx3 = ((T)(-1.0) * var_sqrt * sum2 + ((T)1.0 / (T)block_size_) * sum1 * sum3) * ((T)1.0 / (T)block_size_);
dx[j] = dx1 + dx2 + dx3;
}
@@ -144,16 +152,5 @@ void LayerNormGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
}
(void)common::ThreadPool::GetInstance().SyncRun(tasks2);
}

void LayerNormGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 5) {
MS_LOG(EXCEPTION) << "LayerNormGradCPUKernel needs 5 inputs, but gets " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 3) {
MS_LOG(EXCEPTION) << "LayerNormGradCPUKernel expects 3 output, but gets" << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h View File

@@ -16,6 +16,7 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_GRAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -34,11 +35,10 @@ class LayerNormGradCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
float eps_{1e-12};
size_t block_num_{1};


+ 25
- 18
mindspore/ccsrc/backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -23,12 +23,17 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kMapCacheIdxInputsNum = 5;
constexpr size_t kMapCacheIdxOutputsNum = 4;
} // namespace

template <typename T>
int Compress(HashmapEntry<T> *entry_p, const size_t &length, T entry) {
T i = (entry + 1) % length;
int64_t off = 1;
T i = (entry + 1) % static_cast<T>(length);
T off = 1;
int compress_count = 0;
for (; !entry_p[i].IsEmpty(); i = (i + 1) % length, off++) {
for (; !entry_p[i].IsEmpty(); i = (i + 1) % static_cast<T>(length), off++) {
if (entry_p[i].tag_ > off) {
entry_p[entry].key_ = entry_p[i].key_;
entry_p[entry].value_ = entry_p[i].value_;
@@ -43,28 +48,29 @@ int Compress(HashmapEntry<T> *entry_p, const size_t &length, T entry) {
return compress_count;
}

void UpdateShape(size_t miss_count, const CNodePtr &node_) {
void UpdateShape(size_t miss_count, const CNodePtr &node) {
std::vector<size_t> out_shape;
(void)out_shape.emplace_back(miss_count);
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
size_t output_num = AnfAlgo::GetOutputTensorNum(node);
std::vector<TypeId> dtypes(output_num);
for (size_t i = 0; i < output_num; i++) {
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node, i);
}
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetOutputInferShape(node_, 0), out_shape, out_shape, out_shape},
node_.get());
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetOutputInferShape(node, 0), out_shape, out_shape, out_shape},
node.get());
}

void MapCacheIdxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
node_wpt_ = kernel_node;
auto hashmap_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (hashmap_shape.size() != 2) {
MS_LOG(EXCEPTION) << "Dimension of HashMap must be 2, (n, 4)";
}
hashmap_length_ = hashmap_shape[0];
if (hashmap_length_ <= 0) {
MS_LOG(INFO) << "Value of hashmap_length_ must > 0!";
if (hashmap_length_ == 0) {
MS_LOG(EXCEPTION) << "Value of hashmap_length_ must > 0!";
}
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
}
@@ -72,13 +78,14 @@ void MapCacheIdxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool MapCacheIdxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMapCacheIdxInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMapCacheIdxOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeInt64) {
LaunchKernel<int64_t>(inputs, outputs);
} else {
MS_LOG(ERROR) << "Only support int32, int64";
return false;
MS_LOG(EXCEPTION) << "Only support int32, int64";
}
return true;
}
@@ -86,8 +93,8 @@ bool MapCacheIdxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
template <typename T>
void MapCacheIdxCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
auto node_ = node_wpt_.lock();
auto emb_idx_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1);
auto node = node_wpt_.lock();
auto emb_idx_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
batch_size_ = 1;
for (size_t i = 0; i < emb_idx_shape.size(); ++i) {
batch_size_ *= emb_idx_shape[i];
@@ -157,8 +164,8 @@ void MapCacheIdxCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
tag_count++;
}
hashmap[entry].key_ = emb_idx;
hashmap[entry].step_ = SizeToLong(step_[0]);
hashmap[entry].tag_ = SizeToLong(tag_count);
hashmap[entry].step_ = step_[0];
hashmap[entry].tag_ = static_cast<T>(tag_count);
T tmp_entry = (entry + 1) % static_cast<T>(hashmap_length_);
size_t delete_count = 1;
while (hashmap[tmp_entry].IsEmpty() || hashmap[tmp_entry].IsUsing(step_[0])) {
@@ -184,7 +191,7 @@ void MapCacheIdxCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
for (size_t i = 0; i < miss_count; ++i) {
output_cache_idx[miss_idx[i]] = output_swap_cache_idx[i];
}
UpdateShape(miss_count, node_);
UpdateShape(miss_count, node);
}
} // namespace kernel
} // namespace mindspore

+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.h View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAP_CACHE_IDX_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAP_CACHE_IDX_CPU_KERNEL_H_

@@ -35,10 +36,10 @@ class MapCacheIdxCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);

private:
size_t batch_size_{1};
size_t hashmap_length_{1};
TypeId dtype_{kTypeUnknown};


+ 16
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/map_uniform_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -22,21 +22,28 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kMapUniformInputsNum = 3;
constexpr size_t kMapUniformOutputsNum = 1;
} // namespace

void MapUniformCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
node_wpt_ = kernel_node;
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
}

bool MapUniformCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMapUniformInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMapUniformOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeInt64) {
LaunchKernel<int64_t>(inputs, outputs);
} else {
MS_LOG(ERROR) << "Only support int32, int64";
return false;
MS_LOG(EXCEPTION) << "Only support int32, int64";
}
return true;
}
@@ -44,11 +51,11 @@ bool MapUniformCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
template <typename T>
void MapUniformCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
auto node_ = node_wpt_.lock();
if (!node_) {
auto node = node_wpt_.lock();
if (!node) {
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
}
auto input_x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0);
auto input_x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
batch_size_ = 1;
for (size_t i = 0; i < input_x_shape.size(); ++i) {
batch_size_ *= input_x_shape[i];
@@ -58,6 +65,9 @@ void MapUniformCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
auto per_group_size = *reinterpret_cast<T *>(inputs[1]->addr);
auto group_num = *reinterpret_cast<T *>(inputs[2]->addr);
auto output_x = reinterpret_cast<T *>(outputs[0]->addr);
if (group_num <= 0) {
MS_LOG(EXCEPTION) << "Group num should be greater than 0";
}
T max_num = group_num * per_group_size;
for (size_t i = 0; i < batch_size_; ++i) {
output_x[i] = input_x[i] % group_num * per_group_size + input_x[i] / group_num;


+ 3
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/map_uniform_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAP_UNIFORM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAP_UNIFORM_CPU_KERNEL_H_

@@ -35,10 +36,10 @@ class MapUniformCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);

private:
size_t batch_size_{1};
TypeId dtype_{kTypeUnknown};
CNodeWeakPtr node_wpt_;


+ 20
- 33
mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,9 +19,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kMaximumInputsNum = 2;
constexpr size_t kMaximumOutputsNum = 1;
} // namespace

template <typename T>
void MaximumCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
input_y_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
@@ -42,18 +48,6 @@ void MaximumCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
}
}

template <typename T>
void MaximumCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaximumCPUKernel needs 2 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaximumCPUKernel needs 1 output.";
}
}

template <typename T>
void MaximumCPUKernel<T>::InitInputTensorAndScalar(size_t max_input_shape_size) {
if (max_input_shape_size != output_shape_.size()) {
@@ -77,6 +71,8 @@ void MaximumCPUKernel<T>::InitInputTensors(TypeId input_x_dtype, TypeId input_y_
template <typename T>
bool MaximumCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaximumInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaximumOutputsNum, kernel_name_);
T *input_x_ = reinterpret_cast<T *>(inputs[0]->addr);
T *input_y_ = reinterpret_cast<T *>(inputs[1]->addr);
T *output_ = reinterpret_cast<T *>(outputs[0]->addr);
@@ -85,7 +81,7 @@ bool MaximumCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
}

template <typename T>
void MaximumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *output) {
void MaximumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *output) const {
MS_EXCEPTION_IF_NULL(input_x);
MS_EXCEPTION_IF_NULL(input_y);
MS_EXCEPTION_IF_NULL(output);
@@ -108,7 +104,7 @@ void MaximumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *
}

template <typename T>
bool MaximumCPUKernel<T>::IsBroadcast() {
bool MaximumCPUKernel<T>::IsBroadcast() const {
if (input_x_shape_.size() != input_y_shape_.size()) {
return true;
}
@@ -122,12 +118,12 @@ bool MaximumCPUKernel<T>::IsBroadcast() {

template <typename T>
void MaximumCPUKernel<T>::InitTensorBroadcastShape() {
if (output_shape_.size() > max_dims) {
if (output_shape_.size() > max_dims_) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7";
}
broadcast_input_x_shape_.resize(max_dims, 1);
broadcast_input_y_shape_.resize(max_dims, 1);
broadcast_output_shape_.resize(max_dims, 1);
broadcast_input_x_shape_.resize(max_dims_, 1);
broadcast_input_y_shape_.resize(max_dims_, 1);
broadcast_output_shape_.resize(max_dims_, 1);
for (size_t i = 0; i < output_shape_.size(); i++) {
broadcast_output_shape_[i] = output_shape_[i];
}
@@ -147,7 +143,7 @@ void MaximumCPUKernel<T>::InitTensorBroadcastShape() {

// Broadcast comparison
template <typename T>
size_t MaximumCPUKernel<T>::Index(const size_t &index, const size_t &dim) {
size_t MaximumCPUKernel<T>::Index(const size_t &index, const size_t &dim) const {
return dim == 1 ? 0 : index;
}

@@ -158,10 +154,7 @@ void MaximumCPUKernel<T>::BroadcastArithKernel(const size_t l0, const size_t l1,
const size_t r1, const size_t r2, const size_t r3, const size_t r4,
const size_t r5, const size_t r6, const size_t d0, const size_t d1,
const size_t d2, const size_t d3, const size_t d4, const size_t d5,
const size_t d6, const T *input_x, const T *input_y, T *output) {
MS_EXCEPTION_IF_NULL(input_x);
MS_EXCEPTION_IF_NULL(input_y);
MS_EXCEPTION_IF_NULL(output);
const size_t d6, const T *input_x, const T *input_y, T *output) const {
for (size_t pos = 0; pos < output_num_; pos++) {
size_t i = pos / (d1 * d2 * d3 * d4 * d5 * d6) % d0;
size_t j = pos / (d2 * d3 * d4 * d5 * d6) % d1;
@@ -190,10 +183,7 @@ void MaximumCPUKernel<T>::BroadcastArithKernel(const size_t l0, const size_t l1,
}

template <typename T>
void MaximumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) {
MS_EXCEPTION_IF_NULL(input_x);
MS_EXCEPTION_IF_NULL(input_y);
MS_EXCEPTION_IF_NULL(output);
void MaximumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) const {
if (input_x_shape_.size() == 0) {
for (size_t i = 0; i < output_num_; ++i) {
output[i] = MaximumFunc(input_x[0], input_y[i]);
@@ -206,10 +196,7 @@ void MaximumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, con
}

template <typename T>
void MaximumCPUKernel<T>::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) {
MS_EXCEPTION_IF_NULL(input_x);
MS_EXCEPTION_IF_NULL(input_y);
MS_EXCEPTION_IF_NULL(output);
void MaximumCPUKernel<T>::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) const {
for (size_t i = 0; i < output_num_; ++i) {
output[i] = MaximumFunc(input_x[i], input_y[i]);
}


+ 11
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_CPU_KERNEL_H_

@@ -34,11 +35,9 @@ class MaximumCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);

bool IsBroadcast();
bool IsBroadcast() const;

size_t Index(const size_t &index, const size_t &dim);
size_t Index(const size_t &index, const size_t &dim) const;

void InitTensorBroadcastShape();

@@ -51,15 +50,15 @@ class MaximumCPUKernel : public CPUKernel {
const size_t l5, const size_t l6, const size_t r0, const size_t r1, const size_t r2,
const size_t r3, const size_t r4, const size_t r5, const size_t r6, const size_t d0,
const size_t d1, const size_t d2, const size_t d3, const size_t d4, const size_t d5,
const size_t d6, const T *input_x, const T *input_y, T *output);
const size_t d6, const T *input_x, const T *input_y, T *output) const;

T MaximumFunc(const T &lhs, const T &rhs) { return lhs > rhs ? lhs : rhs; }
T MaximumFunc(const T &lhs, const T &rhs) const { return lhs > rhs ? lhs : rhs; }

void BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output);
void BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) const;

void BroadcastArithTensors(const T *input_x, const T *input_y, T *output);
void BroadcastArithTensors(const T *input_x, const T *input_y, T *output) const;

void BroadcastArith(const T *input_x, const T *input_y, T *output);
void BroadcastArith(const T *input_x, const T *input_y, T *output) const;

private:
bool need_broadcast_{false};
@@ -72,7 +71,7 @@ class MaximumCPUKernel : public CPUKernel {
std::vector<size_t> broadcast_input_x_shape_;
std::vector<size_t> broadcast_input_y_shape_;
std::vector<size_t> broadcast_output_shape_;
const size_t max_dims{7};
const size_t max_dims_{7};
};

MS_REG_CPU_KERNEL_T(Maximum, KernelAttr(), MaximumCPUKernel, int32_t);
@@ -84,4 +83,4 @@ MS_REG_CPU_KERNEL_T(Maximum, KernelAttr(), MaximumCPUKernel, double);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UPDATE_CACHE_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_CPU_KERNEL_H_

+ 10
- 13
mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_grad_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,6 +21,9 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kMaximumGradInputsNum = 3;
constexpr size_t kMaximumGradOutputsNum = 2;

void CheckShape(std::vector<size_t> *shape) {
MS_EXCEPTION_IF_NULL(shape);
if (shape->empty()) {
@@ -30,7 +33,8 @@ void CheckShape(std::vector<size_t> *shape) {
} // namespace

void MaximumGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
y_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
dout_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
@@ -45,6 +49,8 @@ void MaximumGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool MaximumGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaximumGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaximumGradOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeUInt32) {
@@ -57,6 +63,8 @@ bool MaximumGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
LaunchKernel<uint64_t>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat64) {
LaunchKernel<double>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
}
return true;
}
@@ -145,16 +153,5 @@ void MaximumGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, c
MaximumGradRecTask<T>(x_addr, y_addr, dout_addr, dx_addr, dy_addr, 0, 0, 0, 0, x_cargo, y_cargo, dout_cargo, x_shape,
y_shape, dout_shape);
}

void MaximumGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaximumGradCPUKernel needs 3 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 2) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaximumGradCPUKernel needs 2 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 7
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_grad_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,9 @@
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUMGRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUMGRAD_CPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_GRAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -34,11 +35,10 @@ class MaximumGradCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> x_shape_;
std::vector<size_t> y_shape_;
std::vector<size_t> dout_shape;
@@ -50,4 +50,5 @@ class MaximumGradCPUKernel : public CPUKernel {
MS_REG_CPU_KERNEL(MaximumGrad, KernelAttr(), MaximumGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MaximumGrad_CPU_KERNEL_H_

#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_GRAD_CPU_KERNEL_H_

+ 20
- 33
mindspore/ccsrc/backend/kernel_compiler/cpu/minimum_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,9 +19,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kMinimumInputsNum = 2;
constexpr size_t kMinimumOutputsNum = 1;
} // namespace

template <typename T>
void MinimumCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
input_y_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
@@ -42,18 +48,6 @@ void MinimumCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
}
}

template <typename T>
void MinimumCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MinimumCPUKernel needs 2 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MinimumCPUKernel needs 1 output.";
}
}

template <typename T>
void MinimumCPUKernel<T>::InitInputTensorAndScalar(size_t max_input_shape_size) {
if (max_input_shape_size != output_shape_.size()) {
@@ -77,6 +71,8 @@ void MinimumCPUKernel<T>::InitInputTensors(TypeId input_x_dtype, TypeId input_y_
template <typename T>
bool MinimumCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMinimumInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMinimumOutputsNum, kernel_name_);
T *input_x_ = reinterpret_cast<T *>(inputs[0]->addr);
T *input_y_ = reinterpret_cast<T *>(inputs[1]->addr);
T *output_ = reinterpret_cast<T *>(outputs[0]->addr);
@@ -85,7 +81,7 @@ bool MinimumCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
}

template <typename T>
void MinimumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *output) {
void MinimumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *output) const {
MS_EXCEPTION_IF_NULL(input_x);
MS_EXCEPTION_IF_NULL(input_y);
MS_EXCEPTION_IF_NULL(output);
@@ -108,7 +104,7 @@ void MinimumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *
}

template <typename T>
bool MinimumCPUKernel<T>::IsBroadcast() {
bool MinimumCPUKernel<T>::IsBroadcast() const {
if (input_x_shape_.size() != input_y_shape_.size()) {
return true;
}
@@ -122,12 +118,12 @@ bool MinimumCPUKernel<T>::IsBroadcast() {

template <typename T>
void MinimumCPUKernel<T>::InitTensorBroadcastShape() {
if (output_shape_.size() > max_dims) {
if (output_shape_.size() > max_dims_) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7";
}
broadcast_input_x_shape_.resize(max_dims, 1);
broadcast_input_y_shape_.resize(max_dims, 1);
broadcast_output_shape_.resize(max_dims, 1);
broadcast_input_x_shape_.resize(max_dims_, 1);
broadcast_input_y_shape_.resize(max_dims_, 1);
broadcast_output_shape_.resize(max_dims_, 1);
for (size_t i = 0; i < output_shape_.size(); i++) {
broadcast_output_shape_[i] = output_shape_[i];
}
@@ -147,7 +143,7 @@ void MinimumCPUKernel<T>::InitTensorBroadcastShape() {

// Broadcast comparison
template <typename T>
size_t MinimumCPUKernel<T>::Index(const size_t &index, const size_t &dim) {
size_t MinimumCPUKernel<T>::Index(const size_t &index, const size_t &dim) const {
return dim == 1 ? 0 : index;
}

@@ -158,10 +154,7 @@ void MinimumCPUKernel<T>::BroadcastArithKernel(const size_t l0, const size_t l1,
const size_t r1, const size_t r2, const size_t r3, const size_t r4,
const size_t r5, const size_t r6, const size_t d0, const size_t d1,
const size_t d2, const size_t d3, const size_t d4, const size_t d5,
const size_t d6, const T *input_x, const T *input_y, T *output) {
MS_EXCEPTION_IF_NULL(input_x);
MS_EXCEPTION_IF_NULL(input_y);
MS_EXCEPTION_IF_NULL(output);
const size_t d6, const T *input_x, const T *input_y, T *output) const {
for (size_t pos = 0; pos < output_num_; pos++) {
size_t i = pos / (d1 * d2 * d3 * d4 * d5 * d6) % d0;
size_t j = pos / (d2 * d3 * d4 * d5 * d6) % d1;
@@ -190,10 +183,7 @@ void MinimumCPUKernel<T>::BroadcastArithKernel(const size_t l0, const size_t l1,
}

template <typename T>
void MinimumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) {
MS_EXCEPTION_IF_NULL(input_x);
MS_EXCEPTION_IF_NULL(input_y);
MS_EXCEPTION_IF_NULL(output);
void MinimumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) const {
if (input_x_shape_.size() == 0) {
for (size_t i = 0; i < output_num_; ++i) {
output[i] = MinimumFunc(input_x[0], input_y[i]);
@@ -206,10 +196,7 @@ void MinimumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, con
}

template <typename T>
void MinimumCPUKernel<T>::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) {
MS_EXCEPTION_IF_NULL(input_x);
MS_EXCEPTION_IF_NULL(input_y);
MS_EXCEPTION_IF_NULL(output);
void MinimumCPUKernel<T>::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) const {
for (size_t i = 0; i < output_num_; ++i) {
output[i] = MinimumFunc(input_x[i], input_y[i]);
}


+ 11
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/minimum_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUM_CPU_KERNEL_H_

@@ -34,11 +35,9 @@ class MinimumCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);

bool IsBroadcast();
bool IsBroadcast() const;

size_t Index(const size_t &index, const size_t &dim);
size_t Index(const size_t &index, const size_t &dim) const;

void InitTensorBroadcastShape();

@@ -51,15 +50,15 @@ class MinimumCPUKernel : public CPUKernel {
const size_t l5, const size_t l6, const size_t r0, const size_t r1, const size_t r2,
const size_t r3, const size_t r4, const size_t r5, const size_t r6, const size_t d0,
const size_t d1, const size_t d2, const size_t d3, const size_t d4, const size_t d5,
const size_t d6, const T *input_x, const T *input_y, T *output);
const size_t d6, const T *input_x, const T *input_y, T *output) const;

T MinimumFunc(const T &lhs, const T &rhs) { return lhs < rhs ? lhs : rhs; }
T MinimumFunc(const T &lhs, const T &rhs) const { return lhs < rhs ? lhs : rhs; }

void BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output);
void BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) const;

void BroadcastArithTensors(const T *input_x, const T *input_y, T *output);
void BroadcastArithTensors(const T *input_x, const T *input_y, T *output) const;

void BroadcastArith(const T *input_x, const T *input_y, T *output);
void BroadcastArith(const T *input_x, const T *input_y, T *output) const;

private:
bool need_broadcast_{false};
@@ -72,7 +71,7 @@ class MinimumCPUKernel : public CPUKernel {
std::vector<size_t> broadcast_input_x_shape_;
std::vector<size_t> broadcast_input_y_shape_;
std::vector<size_t> broadcast_output_shape_;
const size_t max_dims{7};
const size_t max_dims_{7};
};

MS_REG_CPU_KERNEL_T(Minimum, KernelAttr(), MinimumCPUKernel, int32_t);
@@ -84,4 +83,4 @@ MS_REG_CPU_KERNEL_T(Minimum, KernelAttr(), MinimumCPUKernel, double);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UPDATE_CACHE_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUM_CPU_KERNEL_H_

+ 13
- 18
mindspore/ccsrc/backend/kernel_compiler/cpu/minimum_grad_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,6 +21,9 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kMinimumGradInputsNum = 3;
constexpr size_t kMinimumGradOutputsNum = 2;

void GetCargo(std::vector<size_t> *cargo, const std::vector<size_t> &shape, const std::vector<size_t> &dout_shape) {
int i = dout_shape.size() - 1;
int j = shape.size() - 1;
@@ -58,7 +61,8 @@ void CheckShape(std::vector<size_t> *shape) {
} // namespace

void MinimumGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
y_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
dout_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
@@ -73,6 +77,8 @@ void MinimumGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool MinimumGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMinimumGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMinimumGradOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeUInt32) {
@@ -115,11 +121,11 @@ void MinimumGradRecTask(const T *x, const T *y, const T *dout, T *dx, T *dy, con

template <typename T>
void MinimumGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
auto x_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto y_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto dout_addr = reinterpret_cast<T *>(inputs[2]->addr);
auto dx_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto dy_addr = reinterpret_cast<T *>(outputs[1]->addr);
auto *x_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *y_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto *dout_addr = reinterpret_cast<T *>(inputs[2]->addr);
auto *dx_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto *dy_addr = reinterpret_cast<T *>(outputs[1]->addr);

size_t x_tensor_len = GetTensorLen(x_shape_);
size_t y_tensor_len = GetTensorLen(y_shape_);
@@ -146,16 +152,5 @@ void MinimumGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, c
MinimumGradRecTask<T>(x_addr, y_addr, dout_addr, dx_addr, dy_addr, 0, 0, 0, 0, x_cargo, y_cargo, dout_cargo, x_shape,
y_shape, dout_shape);
}

void MinimumGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MinimumGradCPUKernel needs 3 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 2) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MinimumGradCPUKernel needs 2 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/minimum_grad_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUMGRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUMGRAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -34,11 +35,10 @@ class MinimumGradCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> x_shape_;
std::vector<size_t> y_shape_;
std::vector<size_t> dout_shape;
@@ -50,4 +50,4 @@ class MinimumGradCPUKernel : public CPUKernel {
MS_REG_CPU_KERNEL(MinimumGrad, KernelAttr(), MinimumGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MinimumGrad_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUMGRAD_CPU_KERNEL_H_

+ 14
- 17
mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.cc View File

@@ -33,9 +33,13 @@ constexpr int TOP = 0;
constexpr int BOTTOM = 1;
constexpr int LEFT = 0;
constexpr int RIGHT = 1;
constexpr size_t kMirrorPadInputsNum = 2;
constexpr size_t kMirrorPadOutputsNum = 1;
} // namespace

void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::string mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "mode");
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (mode == "REFLECT") {
@@ -50,12 +54,10 @@ void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
shape_size_ = input_shape.size();
if (shape_size_ == 4) { // shape adjustment from 2d/3d to 4d
} else if (shape_size_ == 3) {
auto it = input_shape.begin();
input_shape.insert(it, 1); // batch padding
(void)input_shape.insert(input_shape.begin(), 1); // batch padding
shape_size_ = 4;
} else if (shape_size_ == 2) {
auto it = input_shape.begin();
input_shape.insert(it, 2, 1); // channel padding
(void)input_shape.insert(input_shape.begin(), 2, 1); // channel padding
shape_size_ = 4;
}

@@ -63,6 +65,7 @@ void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
tensor_size_ *= input_shape[i];
input_shape_.push_back(SizeToLong(input_shape[i]));
}

std::vector<size_t> padding_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
num_paddings_ = SizeToLong(padding_shape[0]);

@@ -74,6 +77,7 @@ void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) {

int64_t max_width = input_shape_[3];
int64_t max_height = input_shape_[2];

if (mode_ == 1) { // symmetric
max_width = max_width + (2 * max_width);
max_height = max_height + (2 * max_height);
@@ -97,6 +101,8 @@ void extract_paddings(const int64_t *paddings_arg, int64_t padd_dim, int64_t *ex

bool MirrorPadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMirrorPadInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMirrorPadOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -112,7 +118,8 @@ bool MirrorPadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, c
}

template <typename T>
void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) const {
auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr);
int64_t *paddings_arg = reinterpret_cast<int64_t *>(inputs[1]->addr);
auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr);
@@ -126,6 +133,7 @@ void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, con
const int64_t padded_height = output_shape_[dim_offset];
const int64_t padded_width = output_shape_[dim_offset + 1];
const int64_t padd_dim = num_paddings_;

const int64_t mode = mode_;

int64_t paddings[MAX_PADDINGS * PADDING_SIZE]; // local and fixed size to keep in registers
@@ -190,16 +198,5 @@ void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, con
outputs_addr[pos] = inputs_addr[pos_index];
}
}

void MirrorPadCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MirrorPadCPUKernel needs 2 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MirrorPadCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 8
- 8
mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h View File

@@ -16,6 +16,7 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -35,19 +36,18 @@ class MirrorPadCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const;

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
size_t tensor_size_ = 1;
size_t shape_size_;
size_t output_size_ = 1;
size_t tensor_size_{1};
size_t shape_size_{0};
size_t output_size_{1};
int64_t mode_{0};
int64_t num_paddings_{0};
std::vector<int64_t> input_shape_;
std::vector<int64_t> output_shape_;
int64_t mode_;
int64_t num_paddings_;
};

MS_REG_CPU_KERNEL(


+ 35
- 45
mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.cc View File

@@ -33,8 +33,28 @@ constexpr int TOP = 0;
constexpr int BOTTOM = 1;
constexpr int LEFT = 0;
constexpr int RIGHT = 1;
constexpr size_t kMirrorPadGradInputsNum = 2;
constexpr size_t kMirrorPadGradOutputsNum = 1;

void extract_paddings(const int64_t *paddings_arg, int64_t padd_dim, int64_t *extracted_paddings) {
const int64_t paddings_offset = MAX_PADDINGS - padd_dim;
for (int64_t i = 0; i < padd_dim; i++) {
extracted_paddings[(paddings_offset + i) * PADDING_SIZE] = paddings_arg[i * PADDING_SIZE];
extracted_paddings[(paddings_offset + i) * PADDING_SIZE + 1] = paddings_arg[i * PADDING_SIZE + 1];
}
}

bool range_check(int64_t x, int64_t y, int64_t padded_width, int64_t padded_height) {
if (((x >= 0) && (x <= padded_width - 1)) && ((y >= 0) && (y <= padded_height - 1))) {
return true;
}
return false;
}
} // namespace

void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::string mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "mode");
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (mode == "REFLECT") {
@@ -49,12 +69,10 @@ void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
shape_size_ = input_shape.size();
if (shape_size_ == 4) { // shape adjustment from 2d/3d to 4d
} else if (shape_size_ == 3) {
auto it = input_shape.begin();
input_shape.insert(it, 1); // batch padding
(void)input_shape.insert(input_shape.begin(), 1); // batch padding
shape_size_ = 4;
} else if (shape_size_ == 2) {
auto it = input_shape.begin();
input_shape.insert(it, 2, 1); // channel padding
(void)input_shape.insert(input_shape.begin(), 2, 1); // channel padding
shape_size_ = 4;
}

@@ -70,11 +88,9 @@ void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {

if (output_shape.size() == 4) {
} else if (output_shape.size() == 3) {
auto it = output_shape.begin();
output_shape.insert(it, 1); // batch padding
(void)output_shape.insert(output_shape.begin(), 1); // batch padding
} else if (output_shape.size() == 2) {
auto it = output_shape.begin();
output_shape.insert(it, 2, 1); // channel padding
(void)output_shape.insert(output_shape.begin(), 2, 1); // channel padding
}
for (auto x : output_shape) {
output_size_ *= x;
@@ -103,24 +119,11 @@ void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
}
}

void extract_paddings_(const int64_t *paddings_arg, int64_t padd_dim, int64_t *extracted_paddings) {
const int64_t paddings_offset = MAX_PADDINGS - padd_dim;
for (int64_t i = 0; i < padd_dim; i++) {
extracted_paddings[(paddings_offset + i) * PADDING_SIZE] = paddings_arg[i * PADDING_SIZE];
extracted_paddings[(paddings_offset + i) * PADDING_SIZE + 1] = paddings_arg[i * PADDING_SIZE + 1];
}
}

bool range_check(int64_t x, int64_t y, int64_t padded_width, int64_t padded_height) {
if (((x >= 0) && (x <= padded_width - 1)) && ((y >= 0) && (y <= padded_height - 1))) {
return true;
}
return false;
}

bool MirrorPadGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMirrorPadGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMirrorPadGradOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, workspace, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -158,12 +161,12 @@ template <typename T>
void MirrorPadGradCPUKernel::MirrorPadGrad_Width_Height(const size_t size, const T *interim_dy, const int64_t dx_height,
const int64_t dx_width, const int64_t dy_height,
const int64_t dy_width, const int64_t padd_dim,
const int64_t *paddings_arg, int64_t mode, T *dx) {
const int64_t *paddings_arg, int64_t mode, T *dx) const {
int64_t paddings[MAX_PADDINGS * PADDING_SIZE]; // local and fixed size to keep in registers
for (int i = 0; i < MAX_PADDINGS * PADDING_SIZE; i++) {
paddings[i] = 0; // init all to 0
}
extract_paddings_(paddings_arg, padd_dim, paddings);
extract_paddings(paddings_arg, padd_dim, paddings);
// Create required anchor points for non-mirrored data inside new tensor
int64_t ap1_x = paddings[WIDTH];
int64_t ap2_x = paddings[WIDTH] + dx_width - 1;
@@ -216,7 +219,6 @@ void MirrorPadGradCPUKernel::MirrorPadGrad_Width_Height(const size_t size, const
}
}
}
return;
}

template <typename T>
@@ -224,12 +226,12 @@ void MirrorPadGradCPUKernel::MirrorPadGradBatchChannel(const size_t size, T *dy,
const int64_t dx_batches, const int64_t dx_channels,
const int64_t dy_height, const int64_t dy_width,
const int64_t padd_dim, const int64_t *paddings_arg,
int64_t mode) {
int64_t mode) const {
int64_t paddings[MAX_PADDINGS * PADDING_SIZE]; // local and fixed size to keep in registers
for (int i = 0; i < MAX_PADDINGS * PADDING_SIZE; i++) {
paddings[i] = 0; // init all to 0
}
extract_paddings_(paddings_arg, padd_dim, paddings);
extract_paddings(paddings_arg, padd_dim, paddings);
// Create anchor points for non mirrored data inside new tensor
int64_t ap1_channel = paddings[CHANNEL];
int64_t ap2_channel = paddings[CHANNEL] + dx_channels - 1;
@@ -273,17 +275,16 @@ void MirrorPadGradCPUKernel::MirrorPadGradBatchChannel(const size_t size, T *dy,
}
}
}
return;
}

template <typename T>
void MirrorPadGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr);
int64_t *paddings = reinterpret_cast<int64_t *>(inputs[1]->addr);
auto interim = reinterpret_cast<T *>(workspace[0]->addr);
auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr);
const std::vector<AddressPtr> &outputs) const {
auto *inputs_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *paddings = reinterpret_cast<int64_t *>(inputs[1]->addr);
auto *interim = reinterpret_cast<T *>(workspace[0]->addr);
auto *outputs_addr = reinterpret_cast<T *>(outputs[0]->addr);

MirrorPadGradBatchChannel(workspace_size_, inputs_addr, interim, output_shape_[0], output_shape_[1], input_shape_[2],
input_shape_[3], num_paddings_, paddings, mode_);
@@ -291,16 +292,5 @@ void MirrorPadGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
MirrorPadGrad_Width_Height(output_size_, interim, output_shape_[2], output_shape_[3], input_shape_[2],
input_shape_[3], num_paddings_, paddings, mode_, outputs_addr);
}

void MirrorPadGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MirrorPadGradCPUKernel needs 2 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MirrorPadGradCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 11
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h View File

@@ -16,13 +16,13 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_GRAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
#include <string>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class MirrorPadGradCPUKernel : public CPUKernel {
@@ -36,34 +36,33 @@ class MirrorPadGradCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void InitWorkspaceSize();

template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs);
const std::vector<AddressPtr> &outputs) const;

template <typename T>
void MirrorPadGrad_Width_Height(const size_t size, const T *interim_dy, const int64_t dx_height,
const int64_t dx_width, const int64_t dy_height, const int64_t dy_width,
const int64_t padd_dim, const int64_t *paddings_arg, int64_t mode, T *dx);
const int64_t padd_dim, const int64_t *paddings_arg, int64_t mode, T *dx) const;

template <typename T>
void MirrorPadGradBatchChannel(const size_t size, T *dy, T *interim_dy, const int64_t dx_batches,
const int64_t dx_channels, const int64_t dy_height, const int64_t dy_width,
const int64_t padd_dim, const int64_t *paddings_arg, int64_t mode);
const int64_t padd_dim, const int64_t *paddings_arg, int64_t mode) const;

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
size_t tensor_size_ = 1;
size_t shape_size_;
size_t output_size_ = 1;
size_t workspace_size_ = 1;
size_t tensor_size_{1};
size_t shape_size_{1};
size_t output_size_{1};
size_t workspace_size_{1};
int mode_{0};
int64_t num_paddings_{0};
std::vector<int64_t> input_shape_;
std::vector<int64_t> output_shape_;
int64_t mode_;
int64_t num_paddings_;
};

MS_REG_CPU_KERNEL(


+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_avg_grad_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/pooling_avg_grad_cpu_kernel.h"
#include <string>
#include <utility>


+ 7
- 7
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ namespace kernel {
constexpr size_t kPoolingMinDim = 4;
constexpr size_t kPoolingMaxDim = 5;
constexpr size_t kPoolingOffsetDim = 2;

constexpr size_t kPoolingInputsNum = 1;
constexpr size_t kPoolingOutputsNum = 1;
void PoolingCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
(void)workspace_size_list_.emplace_back(workspace_size_);
@@ -33,6 +34,7 @@ void PoolingCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {

void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
@@ -78,8 +80,7 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
dnnl::pooling_forward::desc desc =
dnnl::pooling_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::pooling_max, src_desc, dst_desc,
strides_dims, kernels_dims, padding_l, padding_r);
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == prim::kPrimAvgPool->name() || kernel_name == prim::kPrimAvgPool3D->name()) {
if (kernel_name_ == prim::kPrimAvgPool->name()) {
desc = dnnl::pooling_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::pooling_avg, src_desc,
dst_desc, strides_dims, kernels_dims, padding_l, padding_r);
}
@@ -94,9 +95,8 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool PoolingCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kPoolingInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kPoolingOutputsNum, kernel_name_);
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
SetArgumentHandle(DNNL_ARG_WORKSPACE, workspace[0]->addr);


+ 2
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_CPU_KERNEL_H_

@@ -45,8 +46,6 @@ MS_REG_CPU_KERNEL(MaxPool3D, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOu
PoolingCPUKernel);
MS_REG_CPU_KERNEL(AvgPool, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
PoolingCPUKernel);
MS_REG_CPU_KERNEL(AvgPool3D, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
PoolingCPUKernel);
} // namespace kernel
} // namespace mindspore



+ 4
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.h"
#include <string>
#include <utility>
@@ -117,13 +118,13 @@ bool MaxPoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inpu
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaxPoolingGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaxPoolingGradOutputsNum, kernel_name_);

auto input = reinterpret_cast<float *>(inputs[0]->addr);
auto diff = reinterpret_cast<float *>(inputs[2]->addr);
auto output = reinterpret_cast<float *>(outputs[0]->addr);
auto ret = memset_s(output, outputs[0]->size, 0, outputs[0]->size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "Pooling grad memset error!";
MS_LOG(EXCEPTION) << "Pooling grad memset error, ret value:" << ret << ", output address: " << output
<< ", memset size: " << outputs[0]->size;
}
size_t src_wh = src_shape_[2] * src_shape_[3];
size_t dst_wh = dst_shape_[2] * dst_shape_[3];


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.


+ 10
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h"
#include <algorithm>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
@@ -21,8 +22,14 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSoftmaxInputsNum = 1;
constexpr size_t kSoftmaxOutputsNum = 1;
} // namespace

void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<int> axis_list;
std::vector<int64_t> axis_list_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, AXIS);
@@ -48,9 +55,8 @@ void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool SoftmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "Softmax error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSoftmaxInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSoftmaxOutputsNum, kernel_name_);
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();


+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SOFTMAX_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SOFTMAX_CPU_KERNEL_H_



+ 19
- 9
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h"
#include <numeric>
#include <limits>
#include <functional>
#include <cmath>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
@@ -23,6 +25,12 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSoftmaxCrossEntropyWithLogitsInputsNum = 2;
constexpr size_t kSoftmaxCrossEntropyWithLogitsOutputsNum = 2;
constexpr size_t kSoftmaxCrossEntropyWithLogitsWorkspaceSize = 1;
} // namespace

void SoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
@@ -34,9 +42,10 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr

void SoftmaxCrossEntropyWithLogitsCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
dnnl::memory::dims mem_dims;
mem_dims.insert(mem_dims.end(), shape.begin(), shape.end());
(void)mem_dims.insert(mem_dims.end(), shape.begin(), shape.end());
if (mem_dims.size() != 2) {
MS_LOG(EXCEPTION) << "SoftmaxCrossEntropyWithLogits kernel dims invalid " << mem_dims.size();
}
@@ -73,9 +82,10 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *log
bool SoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || workspace.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSoftmaxCrossEntropyWithLogitsInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSoftmaxCrossEntropyWithLogitsOutputsNum, kernel_name_);
CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSoftmaxCrossEntropyWithLogitsWorkspaceSize, kernel_name_);

size_t batch_float_size = batch_size_ * sizeof(float);
size_t batch_class_float_size = class_num_ * batch_float_size;
if (inputs[0]->size != workspace[0]->size || inputs[0]->size != batch_class_float_size ||
@@ -88,10 +98,10 @@ bool SoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kernel::Ad
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, workspace[0]->addr);
ExecutePrimitive();
auto labels = reinterpret_cast<float *>(inputs[1]->addr);
auto logits = reinterpret_cast<float *>(workspace[0]->addr);
auto output1 = reinterpret_cast<float *>(outputs[0]->addr);
auto output2 = reinterpret_cast<float *>(outputs[1]->addr);
const auto *labels = reinterpret_cast<float *>(inputs[1]->addr);
const auto *logits = reinterpret_cast<float *>(workspace[0]->addr);
auto *output1 = reinterpret_cast<float *>(outputs[0]->addr);
auto *output2 = reinterpret_cast<float *>(outputs[1]->addr);
ForwardPostExecute(logits, labels, output1, output2);
return true;
}


+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_



+ 19
- 10
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h"
#include <numeric>
#include <limits>
#include <functional>
#include <cmath>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
@@ -23,6 +25,12 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSparseSoftmaxCrossEntropyWithLogitsInputsNum = 2;
constexpr size_t kSparseSoftmaxCrossEntropyWithLogitsOutputsNum = 1;
constexpr size_t kSparseSoftmaxCrossEntropyWithLogitsWorkspaceSize = 1;
} // namespace

void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
@@ -34,13 +42,14 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNo

void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> label_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
if (label_shape.size() > 1) {
MS_LOG(EXCEPTION) << "Labels shape length should be equal to Logits shape length minus 1";
}
dnnl::memory::dims mem_dims;
mem_dims.insert(mem_dims.end(), shape.begin(), shape.end());
(void)mem_dims.insert(mem_dims.end(), shape.begin(), shape.end());
if (mem_dims.size() != 2) {
MS_LOG(EXCEPTION) << "SparseSoftmaxCrossEntropyWithLogits kernel dims invalid " << mem_dims.size();
}
@@ -66,7 +75,7 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int
float epsilon = std::numeric_limits<float>::min();
for (size_t i = 0; i < batch_size_; ++i) {
if (labels[i] < 0) {
MS_LOG(EXCEPTION) << "Label value must >= 0!";
MS_LOG(EXCEPTION) << "Label value must >= 0";
}
size_t label = IntToSize(labels[i]);
if (label > class_num_) {
@@ -82,7 +91,7 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *la
size_t row_start = 0;
for (size_t i = 0; i < batch_size_; ++i) {
if (labels[i] < 0) {
MS_LOG(EXCEPTION) << "Label value must >= 0!";
MS_LOG(EXCEPTION) << "Label value must >= 0";
}
size_t label = IntToSize(labels[i]);
if (label > class_num_) {
@@ -103,9 +112,9 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *la
bool SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || workspace.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseSoftmaxCrossEntropyWithLogitsInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseSoftmaxCrossEntropyWithLogitsOutputsNum, kernel_name_);
CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseSoftmaxCrossEntropyWithLogitsWorkspaceSize, kernel_name_);
size_t batch_float_size = batch_size_ * sizeof(float);
size_t batch_class_float_size = class_num_ * batch_float_size;
if (inputs[0]->size != workspace[0]->size || inputs[0]->size != batch_class_float_size ||
@@ -120,9 +129,9 @@ bool SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kern
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, workspace[0]->addr);
ExecutePrimitive();
auto labels = reinterpret_cast<int *>(inputs[1]->addr);
auto losses = reinterpret_cast<float *>(workspace[0]->addr);
auto output = reinterpret_cast<float *>(outputs[0]->addr);
const auto *labels = reinterpret_cast<int *>(inputs[1]->addr);
const auto *losses = reinterpret_cast<float *>(workspace[0]->addr);
auto *output = reinterpret_cast<float *>(outputs[0]->addr);
if (is_grad_) {
GradPostExecute(labels, losses, output);
} else {


+ 3
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_

@@ -32,10 +33,8 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public MKLCPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

protected:
void InitInputOutputSize(const CNodePtr &kernel_node) override;

private:
void InitInputOutputSize(const CNodePtr &kernel_node) override;
void ForwardPostExecute(const int *labels, const float *losses, float *output) const;
void GradPostExecute(const int *labels, const float *losses, float *output) const;
bool is_grad_{false};


+ 13
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,13 +13,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/one_hot_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kOneHotInputsNum = 3;
constexpr size_t kOneHotOutputsNum = 1;
} // namespace

void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (output_shape.size() < 2) {
MS_LOG(EXCEPTION) << "Invalid output shape size: " << output_shape.size();
@@ -28,6 +35,7 @@ void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) {
if (axis != -1 && LongToSize(axis) >= output_shape.size()) {
MS_LOG(EXCEPTION) << "Invalid axis: " << axis;
}

if (axis == -1) {
axis_ = output_shape.size() - 1;
} else {
@@ -42,13 +50,12 @@ void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool OneHotCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 3 || outputs.empty()) {
MS_LOG(EXCEPTION) << "Input or output invalid!";
}
auto indices = reinterpret_cast<int *>(inputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kOneHotInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOneHotOutputsNum, kernel_name_);
const auto *indices = reinterpret_cast<int *>(inputs[0]->addr);
auto on_value = reinterpret_cast<float *>(inputs[1]->addr)[0];
auto off_value = reinterpret_cast<float *>(inputs[2]->addr)[0];
auto output = reinterpret_cast<float *>(outputs[0]->addr);
auto *output = reinterpret_cast<float *>(outputs[0]->addr);
size_t elem_num = inputs[0]->size / sizeof(int);

auto task = [this, &indices, &on_value, &off_value, &output](size_t start, size_t end) {


+ 6
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ONE_HOT_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ONE_HOT_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -33,9 +35,9 @@ class OneHotCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
size_t depth_;
size_t stride_;
size_t axis_;
size_t depth_{0};
size_t stride_{0};
size_t axis_{0};
};

MS_REG_CPU_KERNEL(OneHot, KernelAttr(), OneHotCPUKernel);


+ 10
- 22
mindspore/ccsrc/backend/kernel_compiler/cpu/pack_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,17 +20,16 @@

namespace mindspore {
namespace kernel {
template <typename T>
PackCpuFwdKernel<T>::PackCpuFwdKernel()
: axis_(0), input_num_(1), output_size_(0), dims_behind_axis_(1), inputs_host_(nullptr) {}
namespace {
constexpr size_t kPackOutputsNum = 1;
} // namespace

template <typename T>
void PackCpuFwdKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);

axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_num_ = AnfAlgo::GetInputTensorNum(kernel_node);
axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
if (axis_ < 0) {
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
axis_ += (SizeToInt(input_shape.size()) + 1);
@@ -52,11 +51,9 @@ void PackCpuFwdKernel<T>::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
bool PackCpuFwdKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
if (!CheckParam(outputs)) {
return false;
}
auto output = reinterpret_cast<T *>(outputs[0]->addr);

CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num_, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kPackOutputsNum, kernel_name_);
auto *output = reinterpret_cast<T *>(outputs[0]->addr);
inputs_host_ = std::make_unique<T *[]>(input_num_);
for (size_t i = 0; i < inputs.size(); i++) {
inputs_host_[i] = reinterpret_cast<T *>(inputs[i]->addr);
@@ -90,16 +87,7 @@ bool PackCpuFwdKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const st
}

template <typename T>
bool PackCpuFwdKernel<T>::CheckParam(const std::vector<AddressPtr> &outputs) const {
if (outputs.size() != 1) {
MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but PackGpuFwdKernel needs 1 output.";
return false;
}
return true;
}

template <typename T>
void PackCpuFwdKernel<T>::PackTensor(T *output, size_t start, size_t end) {
void PackCpuFwdKernel<T>::PackTensor(T *output, size_t start, size_t end) const {
for (size_t pos = start; pos < end; ++pos) {
size_t cur_input_index = pos / dims_behind_axis_ % input_num_;
size_t cycle_len = input_num_ * dims_behind_axis_;


+ 12
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/pack_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_PACK_CPU_KERNEL_H
#define MINDSPORE_PACK_CPU_KERNEL_H

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PACK_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PACK_CPU_KERNEL_H_

#include <vector>
#include <memory>
@@ -26,7 +27,7 @@ namespace kernel {
template <typename T>
class PackCpuFwdKernel : public CPUKernel {
public:
PackCpuFwdKernel();
PackCpuFwdKernel() = default;
~PackCpuFwdKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;
@@ -34,14 +35,13 @@ class PackCpuFwdKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
bool CheckParam(const std::vector<AddressPtr> &outputs) const;
void PackTensor(T *output, size_t start, size_t end);
void PackTensor(T *output, size_t start, size_t end) const;

int axis_;
size_t input_num_;
size_t output_size_;
size_t dims_behind_axis_;
std::unique_ptr<T *[]> inputs_host_;
int axis_{0};
size_t input_num_{1};
size_t output_size_{0};
size_t dims_behind_axis_{1};
std::unique_ptr<T *[]> inputs_host_ { nullptr };
};

MS_REG_CPU_KERNEL_T(Stack, KernelAttr(), PackCpuFwdKernel, int8_t)
@@ -57,4 +57,4 @@ MS_REG_CPU_KERNEL_T(Stack, KernelAttr(), PackCpuFwdKernel, float)
MS_REG_CPU_KERNEL_T(Stack, KernelAttr(), PackCpuFwdKernel, bool)
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_PACK_CPU_KERNEL_H
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PACK_CPU_KERNEL_H_

+ 16
- 22
mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.cc View File

@@ -19,7 +19,14 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kPadInputsNum = 1;
constexpr size_t kPadOutputsNum = 1;
} // namespace

void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
paddings_ = AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "paddings");
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
@@ -27,12 +34,10 @@ void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
shape_size_ = input_shape.size();
if (shape_size_ == 4) { // shape adjustment from 2d/3d to 4d
} else if (shape_size_ == 3) {
auto it = input_shape.begin();
input_shape.insert(it, 1); // batch padding
(void)input_shape.insert(input_shape.begin(), 1); // batch padding
shape_size_ = 4;
} else if (shape_size_ == 2) {
auto it = input_shape.begin();
input_shape.insert(it, 2, 1); // channel padding
(void)input_shape.insert(input_shape.begin(), 2, 1); // channel padding
shape_size_ = 4;
}

@@ -43,11 +48,9 @@ void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) {

if (paddings_.size() == 4) { // shape adjustment from 2d/3d to 4d
} else if (paddings_.size() == 3) {
auto it = paddings_.begin();
paddings_.insert(it, 1, {0, 0}); // batch padding
(void)paddings_.insert(paddings_.begin(), 1, {0, 0}); // batch padding
} else if (paddings_.size() == 2) {
auto it = paddings_.begin();
paddings_.insert(it, 2, {0, 0}); // channel padding
(void)paddings_.insert(paddings_.begin(), 2, {0, 0}); // channel padding
}

for (size_t i = 0; i < shape_size_; i++) {
@@ -59,6 +62,8 @@ void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool PadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kPadInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kPadOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -74,9 +79,9 @@ bool PadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const s
}

template <typename T>
void PadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr);
void PadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const {
const auto *inputs_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *outputs_addr = reinterpret_cast<T *>(outputs[0]->addr);

const int pad_left = paddings_[3][0];
const int pad_top = paddings_[2][0];
@@ -112,16 +117,5 @@ void PadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std
}
}
}

void PadCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but PadCPUKernel needs 1 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but PadCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 7
- 7
mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.h View File

@@ -16,6 +16,7 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -34,16 +35,15 @@ class PadCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const;

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<std::vector<int64_t>> paddings_;
TypeId dtype_{kTypeUnknown};
uint64_t tensor_size_ = 1;
size_t shape_size_ = 1;
uint64_t output_size_ = 1;
uint64_t tensor_size_{1};
size_t shape_size_{1};
uint64_t output_size_{1};
std::vector<std::vector<int64_t>> paddings_;
std::vector<size_t> input_shape_;
std::vector<size_t> output_shape_;
};


+ 17
- 8
mindspore/ccsrc/backend/kernel_compiler/cpu/range_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,18 +13,27 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/range_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kRangeInputsNum = 3;
constexpr size_t kRangeOutputsNum = 1;
} // namespace

void RangeCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
}

bool RangeCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kRangeInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kRangeOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeInt32) {
return LaunchKernel<int32_t>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -35,19 +44,19 @@ bool RangeCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const
}

template <typename T>
bool RangeCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
T start_ = reinterpret_cast<T *>(inputs[0]->addr)[0];
T limit_ = reinterpret_cast<T *>(inputs[1]->addr)[0];
T delta_ = reinterpret_cast<T *>(inputs[2]->addr)[0];
bool RangeCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const {
auto start = reinterpret_cast<T *>(inputs[0]->addr)[0];
auto limit = reinterpret_cast<T *>(inputs[1]->addr)[0];
auto delta = reinterpret_cast<T *>(inputs[2]->addr)[0];

auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
size_t elem_num = outputs[0]->size / sizeof(T);
for (size_t i = 0; i < elem_num; i++) {
T val_ = start_ + static_cast<T>(i) * delta_;
if (val_ > limit_) {
T val = start + static_cast<T>(i) * delta;
if (val > limit) {
break;
}
output_addr[i] = val_;
output_addr[i] = val;
}
return true;
}


+ 6
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/range_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANGE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANGE_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -31,10 +33,11 @@ class RangeCPUKernel : public CPUKernel {

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
template <typename T>
bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
template <typename T>
bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const;

TypeId dtype_{kTypeUnknown};
};



+ 25
- 17
mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc View File

@@ -23,13 +23,23 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kReduceSmallVectorSize = 200000;
constexpr size_t kReduceInputsNum = 1;
constexpr size_t kReduceOutputsNum = 1;
} // namespace

template <typename T>
void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS);
auto prim = AnfAlgo::GetCNodePrimitive(kernel_node);
MS_EXCEPTION_IF_NULL(prim);
auto axis_addr = prim->GetAttr(AXIS);
if (axis_addr == nullptr) {
MS_LOG(EXCEPTION) << "Miss attribute " << AXIS;
}
if (axis_addr->isa<ValueTuple>() || axis_addr->isa<ValueList>()) {
axis_ = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, AXIS);
} else if (axis_addr->isa<Int64Imm>()) {
@@ -39,8 +49,8 @@ void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
}

int dimension = input_shape_.size();
std::transform(axis_.begin(), axis_.end(), axis_.begin(),
[dimension](const auto &a) { return a < 0 ? dimension + a : a; });
(void)std::transform(axis_.begin(), axis_.end(), axis_.begin(),
[dimension](const auto &a) { return a < 0 ? dimension + a : a; });
sort(axis_.begin(), axis_.end());
// Delete the duplicate axis.
auto last = std::unique(axis_.begin(), axis_.end());
@@ -48,30 +58,30 @@ void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);

if constexpr (std::is_same<T, bool>::value) {
if (kernel_name == "ReduceAll") {
if (kernel_name_ == prim::kPrimReduceAll->name()) {
reduce_type_ = kReduceAll;
reduce_func_ = [](const T *input, size_t pos, T *out) { *out &= input[pos]; };
} else if (kernel_name == "ReduceAny") {
} else if (kernel_name_ == prim::kPrimReduceAny->name()) {
reduce_type_ = kReduceAny;
reduce_func_ = [](const T *input, size_t pos, T *out) { *out |= input[pos]; };
} else {
MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << fullname_ << " for bool.";
MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << kernel_name_ << " for bool.";
}
} else {
if (kernel_name == "ReduceMax") {
if (kernel_name_ == prim::kPrimReduceMax->name()) {
reduce_type_ = kReduceMax;
reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::max(input[pos], *out); };
} else if (kernel_name == "ReduceMin") {
} else if (kernel_name_ == prim::kPrimReduceMin->name()) {
reduce_type_ = kReduceMin;
reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::min(input[pos], *out); };
} else if (kernel_name == "ReduceSum") {
} else if (kernel_name_ == prim::kPrimReduceSum->name()) {
reduce_type_ = kReduceSum;
reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; };
} else if (kernel_name == "ReduceMean") {
} else if (kernel_name_ == prim::kPrimReduceMean->name()) {
reduce_type_ = kReduceMean;
reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; };
} else {
MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << kernel_name;
MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << kernel_name_;
}
}

@@ -87,13 +97,11 @@ void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
bool ReduceCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kReduceInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kReduceOutputsNum, kernel_name_);
size_t input_size = inputs[0]->size / sizeof(T);
if (input_size == 0) {
MS_LOG(EXCEPTION) << "Input data size is 0.";
}

auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);
if (axis_.empty() || input_shape_.empty() || input_shape_.size() == 1) {
if (input_size < kReduceSmallVectorSize) {
// Get one ret


+ 3
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include <string>


Loading…
Cancel
Save