Browse Source

code check for cpu ops

tags/v1.6.0
fan-jibin fanjibin 4 years ago
parent
commit
95473ee561
100 changed files with 903 additions and 870 deletions
  1. +24
    -0
      mindspore/ccsrc/backend/kernel_compiler/common_utils.h
  2. +0
    -8
      mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_pull_weight_kernel.h
  3. +0
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_push_weight_kernel.h
  4. +13
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/addn_cpu_kernel.cc
  5. +11
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/assignadd_cpu_kernel.cc
  6. +14
    -7
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_cpu_kernel.cc
  7. +12
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_grad_cpu_kernel.cc
  8. +19
    -8
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
  9. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
  10. +18
    -8
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
  11. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
  12. +10
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv_cpu_kernel.cc
  13. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv_cpu_kernel.h
  14. +15
    -7
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_avg_grad_cpu_kernel.cc
  15. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_avg_grad_cpu_kernel.h
  16. +15
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.cc
  17. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.h
  18. +48
    -90
      mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c
  19. +3
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unsorted_segment_sum_base.c
  20. +3
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.c
  21. +10
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.c
  22. +9
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c
  23. +10
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c
  24. +3
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/print_cpu_kernel.cc
  25. +3
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/print_cpu_kernel.h
  26. +12
    -14
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
  27. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h
  28. +6
    -7
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
  29. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h
  30. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
  31. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h
  32. +0
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h
  33. +3
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc
  34. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc
  35. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h
  36. +9
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.cc
  37. +15
    -27
      mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.cc
  38. +2
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.h
  39. +8
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.cc
  40. +3
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h
  41. +14
    -10
      mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.cc
  42. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
  43. +15
    -24
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.cc
  44. +2
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.h
  45. +16
    -24
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.cc
  46. +5
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.h
  47. +15
    -22
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.cc
  48. +1
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.h
  49. +17
    -24
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_grad_cpu_kernel.cc
  50. +1
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_grad_cpu_kernel.h
  51. +8
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/rmsprop_cpu_kernel.cc
  52. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/rmsprop_cpu_kernel.h
  53. +55
    -58
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.cc
  54. +19
    -21
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.h
  55. +10
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc
  56. +1
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.h
  57. +5
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc
  58. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h
  59. +12
    -13
      mindspore/ccsrc/backend/kernel_compiler/cpu/select_cpu_kernel.cc
  60. +5
    -18
      mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
  61. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
  62. +14
    -18
      mindspore/ccsrc/backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_cpu_kernel.cc
  63. +3
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_cpu_kernel.h
  64. +15
    -19
      mindspore/ccsrc/backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_grad_cpu_kernel.cc
  65. +3
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_grad_cpu_kernel.h
  66. +16
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
  67. +1
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h
  68. +35
    -30
      mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc
  69. +6
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h
  70. +14
    -19
      mindspore/ccsrc/backend/kernel_compiler/cpu/smooth_l1_loss_cpu_kernel.cc
  71. +2
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/smooth_l1_loss_cpu_kernel.h
  72. +15
    -20
      mindspore/ccsrc/backend/kernel_compiler/cpu/smooth_l1_loss_grad_cpu_kernel.cc
  73. +0
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/smooth_l1_loss_grad_cpu_kernel.h
  74. +15
    -15
      mindspore/ccsrc/backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.cc
  75. +3
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.h
  76. +15
    -15
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc
  77. +10
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h
  78. +17
    -15
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc
  79. +13
    -8
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h
  80. +19
    -17
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc
  81. +9
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h
  82. +8
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc
  83. +8
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h
  84. +4
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h
  85. +16
    -10
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc
  86. +0
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.h
  87. +5
    -18
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc
  88. +0
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.h
  89. +13
    -15
      mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc
  90. +7
    -13
      mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.h
  91. +33
    -32
      mindspore/ccsrc/backend/kernel_compiler/cpu/stridedslice_cpu_kernel.cc
  92. +6
    -7
      mindspore/ccsrc/backend/kernel_compiler/cpu/stridedslice_cpu_kernel.h
  93. +14
    -8
      mindspore/ccsrc/backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.cc
  94. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.h
  95. +8
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/tensor_copy_slices_cpu_kernel.cc
  96. +9
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/tensoradd_cpu_kernel.cc
  97. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/tensoradd_cpu_kernel.h
  98. +15
    -23
      mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.cc
  99. +3
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.h
  100. +9
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/topk_cpu_kernel.cc

+ 24
- 0
mindspore/ccsrc/backend/kernel_compiler/common_utils.h View File

@@ -144,6 +144,30 @@ std::vector<int64_t> CalDimOffset(const std::vector<int64_t> &input_shape);
size_t GetCopySize(const std::vector<int64_t> &dim_offset, const std::vector<int64_t> &start,
const std::vector<int64_t> &stop);
size_t UnitSizeInBytes(const mindspore::TypeId &t);

#define CHECK_KERNEL_INPUTS_NUM(actual_inputs_num, expect_inputs_num, kernel_name) \
do { \
if ((actual_inputs_num) != (expect_inputs_num)) { \
MS_LOG(EXCEPTION) << (kernel_name) << " requires " << (expect_inputs_num) << " inputs, but got " \
<< (actual_inputs_num) << "."; \
} \
} while (0)

#define CHECK_KERNEL_OUTPUTS_NUM(actual_outputs_num, expect_outputs_num, kernel_name) \
do { \
if ((actual_outputs_num) != (expect_outputs_num)) { \
MS_LOG(EXCEPTION) << (kernel_name) << " should have " << (expect_outputs_num) << " outputs, but got " \
<< (actual_outputs_num) << "."; \
} \
} while (0)

#define CHECK_KERNEL_WORKSPACE_SIZE(actual_size, expect_size, kernel_name) \
do { \
if ((actual_size) != (expect_size)) { \
MS_LOG(EXCEPTION) << (kernel_name) << " requires " << (expect_size) << " workspace, but got " << (actual_size) \
<< "."; \
} \
} while (0)
} // namespace kernel
} // namespace mindspore



+ 0
- 8
mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_pull_weight_kernel.h View File

@@ -44,7 +44,6 @@ class FusedPullWeightKernel : public CPUKernel {
if (inputs.size() != weight_full_names_.size()) {
MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but FusedPullWeightKernel needs "
<< weight_full_names_.size() << " weights as inputs.";
return false;
}

std::shared_ptr<fl::FBBuilder> fbb = std::make_shared<fl::FBBuilder>();
@@ -67,7 +66,6 @@ class FusedPullWeightKernel : public CPUKernel {
MS_LOG(INFO) << "Launching pulling weight for federated learning iteration " << fl_iteration_;
if (!BuildPullWeightReq(fbb)) {
MS_LOG(EXCEPTION) << "Building request for FusedPullWeight failed.";
return false;
}

std::shared_ptr<std::vector<unsigned char>> pull_weight_rsp_msg = nullptr;
@@ -98,13 +96,11 @@ class FusedPullWeightKernel : public CPUKernel {
fbb = std::make_shared<fl::FBBuilder>();
if (!BuildPullWeightReq(fbb)) {
MS_LOG(EXCEPTION) << "Building request for FusedDownloadWeightsByKeys failed.";
return false;
}
continue;
} else if (retcode != schema::ResponseCode_SUCCEED) {
MS_LOG(EXCEPTION) << "FusedPullWeight failed. Server return code: " << pull_weight_rsp->retcode()
<< ", reason: " << pull_weight_rsp->reason()->str();
return false;
} else {
MS_LOG(DEBUG) << "FusedPullWeight succeed.";
}
@@ -115,13 +111,11 @@ class FusedPullWeightKernel : public CPUKernel {
const std::string &weight_name = weight_full_names_[i];
if (feature_map.count(weight_name) == 0) {
MS_LOG(EXCEPTION) << "The weights for " << weight_name << " is not pulled from server.";
return false;
}
int ret =
memcpy_s(inputs[i]->addr, inputs[i]->size, feature_map[weight_name].addr, feature_map[weight_name].size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return false;
}
}
MS_LOG(INFO) << "Pull weights for " << weight_full_names_ << " success. Iteration: " << fl_iteration_;
@@ -147,7 +141,6 @@ class FusedPullWeightKernel : public CPUKernel {
MS_LOG(EXCEPTION)
<< "Attributes of FusedPullWeightKernel are invalid: server number is 0 or weight_full_names_ is "
"empty or indices_ is UINT32_MAX.";
return;
}

size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
@@ -186,7 +179,6 @@ class FusedPullWeightKernel : public CPUKernel {
if (fbs_feature_map->size() != weight_full_names_.size()) {
MS_LOG(EXCEPTION) << "FusedPullWeightKernel should get " << weight_full_names_.size() << " weights, but got "
<< fbs_feature_map->size() << " weights.";
return {};
}

std::map<std::string, Address> feature_map;


+ 0
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_push_weight_kernel.h View File

@@ -42,7 +42,6 @@ class FusedPushWeightKernel : public CPUKernel {
if (inputs.size() != weight_full_names_.size()) {
MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but FusedPushWeightKernel needs "
<< weight_full_names_.size() << " weights as inputs.";
return false;
}

std::shared_ptr<fl::FBBuilder> fbb = std::make_shared<fl::FBBuilder>();
@@ -65,7 +64,6 @@ class FusedPushWeightKernel : public CPUKernel {
MS_LOG(INFO) << "Launching pushing weight for federated learning iteration " << fl_iteration_;
if (!BuildPushWeightReq(fbb, inputs)) {
MS_LOG(EXCEPTION) << "Building request for FusedPushWeight failed.";
return false;
}

// The server number may change after scaling in/out.
@@ -97,13 +95,11 @@ class FusedPushWeightKernel : public CPUKernel {
<< ". Retry later.";
if (!BuildPushWeightReq(fbb, inputs)) {
MS_LOG(EXCEPTION) << "Building request for FusedPushWeight failed.";
return false;
}
continue;
} else if (retcode != schema::ResponseCode_SUCCEED) {
MS_LOG(EXCEPTION) << "FusedPushWeight failed. Server return code: " << push_weight_rsp->retcode()
<< ", reason: " << push_weight_rsp->reason()->str();
return false;
} else {
MS_LOG(DEBUG) << "FusedPushWeight succeed.";
}
@@ -132,7 +128,6 @@ class FusedPushWeightKernel : public CPUKernel {
MS_LOG(EXCEPTION)
<< "Attributes of FusedPushWeightKernel are invalid: server number is 0 or weight_full_names_ is "
"empty or indices_ is UINT32_MAX.";
return;
}

size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);


+ 13
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/addn_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -24,17 +24,26 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kAddNInputsMinNum = 2;
constexpr size_t kAddNOutputsNum = 1;

void AddInt(const int *in_0, const int *in_1, int *out, int start, int end) {
int ret = ElementAddInt(in_0 + start, in_1 + start, out + start, end - start);
if (ret != NNACL_OK) {
MS_LOG(EXCEPTION) << "Add failed.";
}
}
} // namespace

void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_num_ = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num_ < kAddNInputsMinNum) {
MS_LOG(EXCEPTION) << "Input numbers should not less " << kAddNInputsMinNum << ", but got " << input_num_;
}
CheckParam(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
@@ -52,6 +61,8 @@ void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool AddNCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num_, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kAddNOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat32) {
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr);
@@ -93,10 +104,6 @@ void AddNCPUKernel::CheckParam(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << "AddN input shapes must be equal.";
}
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but AddNCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 11
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/assignadd_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/assignadd_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
@@ -20,13 +21,19 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kAssignAddInputsNum = 2;
constexpr size_t kAssignAddOutputsNum = 1;
} // namespace

void AssignAddCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
if (src1_shape.size() == 0 && src0_shape.size() == 0) {
src0_shape.insert(src0_shape.begin(), 1);
src1_shape.insert(src1_shape.begin(), 1);
(void)src0_shape.insert(src0_shape.begin(), 1);
(void)src1_shape.insert(src1_shape.begin(), 1);
}
if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) {
MS_LOG(EXCEPTION) << "AssignAdd only support same dim input or tensor * scalar " << src0_shape.size() << " vs "
@@ -49,9 +56,8 @@ void AssignAddCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool AssignAddCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2) {
MS_LOG(EXCEPTION) << "AssignAdd error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kAssignAddInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kAssignAddOutputsNum, kernel_name_);
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
@@ -59,7 +65,6 @@ bool AssignAddCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, c
auto ret = memcpy_s(inputs[0]->addr, inputs[0]->size, outputs[0]->addr, outputs[0]->size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "Memcpy_s error, errorno " << ret;
return false;
}
return true;
}


+ 14
- 7
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/batch_norm_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
@@ -20,9 +21,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kBatchNormInputsNum = 5;
constexpr size_t kBatchNormOutputsNum = 5;
constexpr size_t kBatchNormInputShapeSize = 4;
constexpr size_t kBatchNormInputShapeSize2 = 2;
} // namespace

void BatchNormCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t type_size = sizeof(float);
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
size_t tensor_size = shape[1] * 2 * type_size; // [2, c] to store scale and bias
@@ -31,12 +38,13 @@ void BatchNormCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {

void BatchNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
is_train = AnfAlgo::GetNodeAttr<bool>(kernel_node, "is_training");
momentum = AnfAlgo::GetNodeAttr<float>(kernel_node, "momentum");
std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (x_shape.size() == 2) {
(void)x_shape.insert(x_shape.end(), 2, 1); // expand 2 dim: NC -> NCHW
} else if (x_shape.size() != 4) {
if (x_shape.size() == kBatchNormInputShapeSize2) {
(void)x_shape.insert(x_shape.end(), kBatchNormInputShapeSize - kBatchNormInputShapeSize2, 1);
} else if (x_shape.size() != kBatchNormInputShapeSize) {
MS_LOG(EXCEPTION) << "Batchnorm only support nchw input!";
}
batch_size = x_shape[0];
@@ -67,9 +75,8 @@ void BatchNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool BatchNormCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 5 || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kBatchNormInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kBatchNormOutputsNum, kernel_name_);
auto wksp = reinterpret_cast<float *>(workspace[0]->addr);
auto scale_ret = memcpy_s(wksp, workspace[0]->size, inputs[1]->addr, inputs[1]->size);
auto max_size = workspace[0]->size - inputs[1]->size;


+ 12
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_grad_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/batch_norm_grad_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
@@ -20,9 +21,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kBatchNormGradInputsNum = 6;
constexpr size_t kBatchNormGradOutputsNum = 3;
constexpr size_t kBatchNormGradInputShapeSize = 4;
constexpr size_t kBatchNormGradInputShapeSize2 = 2;
} // namespace

void BatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t type_size = sizeof(float);
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, Y_BACKPROP);
size_t tensor_size = shape[C] * SCALE_SHIFT_NUM * type_size;
@@ -35,6 +42,7 @@ void BatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {

void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (x_shape.size() == NC) {
(void)x_shape.insert(x_shape.end(), (NCHW - NC), 1);
@@ -76,10 +84,9 @@ void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool BatchNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
constexpr size_t INPUT_NUM = 5;
if (inputs.size() < INPUT_NUM || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kBatchNormGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kBatchNormGradOutputsNum, kernel_name_);

auto wksp_in = reinterpret_cast<float *>(workspace[SCALE_BIAS]->addr);
auto scale_ret = memcpy_s(wksp_in, workspace[SCALE_BIAS]->size, inputs[SCALE]->addr, inputs[SCALE]->size);
if (scale_ret != 0) {


+ 19
- 8
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
#include <string>
#include <algorithm>
@@ -22,12 +23,20 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kShapeSize2D = 2;
constexpr size_t kShapeSize4D = 4;
constexpr size_t kConv2dGradFilterInputsNum = 2;
constexpr size_t kConv2dGradFilterOutputsNum = 1;
} // namespace

void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> weight_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 || weight_shape.size() != 4) {
if (src_shape.size() != kShapeSize4D || weight_shape.size() != kShapeSize4D) {
MS_LOG(EXCEPTION) << ("Conv2d grad filter only support nchw input!");
}
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
@@ -36,7 +45,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
if (src_shape[1] % group != 0) {
MS_LOG(EXCEPTION) << "Conv2d channels should be divided by group!";
}
weight_shape.insert(weight_shape.begin(), group);
(void)weight_shape.insert(weight_shape.begin(), group);
weight_shape[1] = weight_shape[1] / group;
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
@@ -47,16 +56,19 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE);
auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION);
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),
[](const int64_t &value) { return static_cast<int>(value); });
[](const int64_t &value) { return LongToInt(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
[](const int64_t &value) { return static_cast<int>(value); });
[](const int64_t &value) { return LongToInt(value); });

if (dilation_ori.size() != 4) {
if (dilation_ori.size() != kShapeSize4D) {
MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation must be 4d!";
}
if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation only support 1 in N axis and C axis!";
}
if (stride_ori.size() < kShapeSize2D) {
MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel stride_ori should not less than 2d!";
}
std::vector<int> stride{stride_ori[0], stride_ori[1]};
std::vector<int> dilation{dilation_ori[2], dilation_ori[3]};
dnnl::memory::dims strides{stride_ori[0], stride_ori[1]};
@@ -91,9 +103,8 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool Conv2dGradFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kConv2dGradFilterInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConv2dGradFilterOutputsNum, kernel_name_);
SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS, outputs[0]->addr);


+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_



+ 18
- 8
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
#include <string>
#include <map>
@@ -23,13 +24,21 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kConv2dGradInputInputsNum = 2;
constexpr size_t kConv2dGradInputOutputsNum = 1;
constexpr size_t kShapeSize2D = 2;
constexpr size_t kShapeSize4D = 4;
const std::map<std::string, size_t> kFormatIndexMap = {{"NCHW", 2}, {"HWCN", 0}, {"NHWC", 1}};
} // namespace

void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 || weight_shape.size() != 4) {
if (src_shape.size() != kShapeSize4D || weight_shape.size() != kShapeSize4D) {
MS_LOG(EXCEPTION) << "Conv2d grad filter only support nchw input!";
}
std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});
@@ -38,7 +47,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
if (src_shape[1] % group != 0) {
MS_LOG(EXCEPTION) << "Conv2d channels should be divided by group!";
}
weight_shape.insert(weight_shape.begin(), group);
(void)weight_shape.insert(weight_shape.begin(), group);
weight_shape[1] = weight_shape[1] / group;
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
@@ -64,13 +73,15 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
[](const int64_t &value) { return static_cast<int>(value); });

if (dilation_ori.size() != 4) {
if (dilation_ori.size() != kShapeSize4D) {
MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation must be 4d!";
}
if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation only support 1 in N axis and C axis!";
}

if (stride_ori.size() < kShapeSize2D) {
MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel stride_ori should not less than 2d!";
}
std::vector<int> stride{stride_ori[0], stride_ori[1]};
std::vector<int> dilation{dilation_ori[2], dilation_ori[3]};
dnnl::memory::dims strides{stride_ori[0], stride_ori[1]};
@@ -105,9 +116,8 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool Conv2dGradInputCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kConv2dGradInputInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConv2dGradInputOutputsNum, kernel_name_);
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr);


+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_



+ 10
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/mkldnn/conv_cpu_kernel.h"
#include <string>
#include <algorithm>
@@ -22,13 +23,17 @@

namespace mindspore {
namespace kernel {
constexpr size_t kConvInputTensorNum = 2;
namespace {
constexpr size_t kConvInputsNum = 2;
constexpr size_t kConvOutputsNum = 1;
constexpr size_t kShapeSize4D = 4;
constexpr size_t kShapeSize5D = 5;
constexpr size_t kKernelStartAxis = 2;
} // namespace

void ConvCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
@@ -59,9 +64,9 @@ void ConvCPUKernel::InitKernel(const CNodePtr &kernel_node) {
auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, stride_attr);
auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, dilation_attr);
(void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),
[](const int64_t &value) { return static_cast<int>(value); });
[](const int64_t &value) { return LongToInt(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
[](const int64_t &value) { return static_cast<int>(value); });
[](const int64_t &value) { return LongToInt(value); });
if (stride_ori.size() != src_dim) {
MS_LOG(EXCEPTION) << "Conv stride size must be " << src_dim << "D!";
}
@@ -111,9 +116,8 @@ void ConvCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool ConvCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < kConvInputTensorNum || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kConvInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConvOutputsNum, kernel_name_);
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv_cpu_kernel.h View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV_CPU_KERNEL_H_

@@ -35,7 +36,6 @@ class ConvCPUKernel : public MKLCPUKernel {

MS_REG_CPU_KERNEL(Conv2D, KernelAttr(), ConvCPUKernel);
MS_REG_CPU_KERNEL(Conv3D, KernelAttr(), ConvCPUKernel);

} // namespace kernel
} // namespace mindspore



+ 15
- 7
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_avg_grad_cpu_kernel.cc View File

@@ -23,8 +23,17 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kAvgPoolingGradInputsNum = 3;
constexpr size_t kkAvgPoolingGradOutputsNum = 1;
constexpr size_t kAvgPoolingGradKernelSize = 4;
constexpr size_t kkAvgPoolingGradStrideSize = 4;
constexpr size_t kkAvgPoolingGradPadSize = 2;
} // namespace

void AvgPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
@@ -34,10 +43,10 @@ void AvgPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::vector<int64_t> kernel_sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, KERNEL_SIZE);
std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
(void)std::transform(kernel_sizes_me.begin(), kernel_sizes_me.end(), std::back_inserter(origin_kernel_sizes),
[](const int64_t &value) { return static_cast<int>(value); });
[](const int64_t &value) { return LongToInt(value); });
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides),
[](const int64_t &value) { return static_cast<int>(value); });
if (origin_kernel_sizes.size() != 4 || strides.size() != 4) {
[](const int64_t &value) { return LongToInt(value); });
if (origin_kernel_sizes.size() != kAvgPoolingGradKernelSize || strides.size() != kkAvgPoolingGradStrideSize) {
MS_LOG(EXCEPTION) << "Invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size();
}
std::vector<int> stride{strides[2], strides[3]};
@@ -49,7 +58,7 @@ void AvgPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::vector<size_t> kernel_size({IntToSize(origin_kernel_sizes[2]), IntToSize(origin_kernel_sizes[3])});
std::vector<int> dummy_dilation{1, 1};
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r, dummy_dilation);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
if (int_padding_l.size() != kkAvgPoolingGradPadSize || int_padding_r.size() != kkAvgPoolingGradPadSize) {
MS_LOG(EXCEPTION) << "Pooling avg get padding failed";
}
dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]};
@@ -77,9 +86,8 @@ void AvgPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool AvgPoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 3 || outputs.empty()) {
MS_LOG(EXCEPTION) << "Pooling avg grad error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kAvgPoolingGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kkAvgPoolingGradOutputsNum, kernel_name_);
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[2]->addr);


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_avg_grad_cpu_kernel.h View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_AVG_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_AVG_GRAD_CPU_KERNEL_H_



+ 15
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.cc View File

@@ -23,8 +23,17 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kMaxPoolingGradInputsNum = 3;
constexpr size_t kMaxPoolingGradOutputsNum = 1;
constexpr size_t kMaxPoolingGradKernelSize = 4;
constexpr size_t kMaxPoolingGradStrideSize = 4;
constexpr size_t kMaxPoolingGradInputShapeSize = 4;
} // namespace

void MaxPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
dst_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<int> kernel_sizes;
@@ -32,10 +41,11 @@ void MaxPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
auto kernel_sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, KERNEL_SIZE);
auto strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES);
(void)std::transform(kernel_sizes_me.begin(), kernel_sizes_me.end(), std::back_inserter(kernel_sizes),
[](const int64_t &value) { return static_cast<int>(value); });
[](const int64_t &value) { return LongToInt(value); });
(void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides),
[](const int64_t &value) { return static_cast<int>(value); });
if (kernel_sizes.size() != 4 || strides.size() != 4 || src_shape_.size() != 4 || dst_shape_.size() != 4) {
[](const int64_t &value) { return LongToInt(value); });
if (kernel_sizes.size() != kMaxPoolingGradKernelSize || strides.size() != kMaxPoolingGradStrideSize ||
src_shape_.size() != kMaxPoolingGradInputShapeSize || dst_shape_.size() != kMaxPoolingGradInputShapeSize) {
MS_LOG(EXCEPTION) << "Pooling grad invalid input size!";
}
std::vector<int> padding_r;
@@ -105,9 +115,8 @@ void MaxPoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float
bool MaxPoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 3 || outputs.empty()) {
MS_LOG(EXCEPTION) << "Pooling grad error input output size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaxPoolingGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaxPoolingGradOutputsNum, kernel_name_);

auto input = reinterpret_cast<float *>(inputs[0]->addr);
auto diff = reinterpret_cast<float *>(inputs[2]->addr);


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.h View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_MAX_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_MAX_GRAD_CPU_KERNEL_H_



+ 48
- 90
mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c View File

@@ -174,35 +174,14 @@
} \
}

#define TRANSPOSE_MULTI_DIMS(TYPE, NAME) \
int Transpose##NAME(const TYPE *in_data, TYPE *out_data, const int *strides, const int *out_strides, \
const int *perm, const int *output_shape, int dims, int *size, int *position) { \
if (size == NULL || position == NULL) { \
return NNACL_ERR; \
} \
*(size + dims - 1) = 1; \
for (int i = dims - 1; i > 0; --i) { \
*(size + i - 1) = *(size + i) * output_shape[i]; \
} \
for (int idx = 0; idx < (*size) * output_shape[0]; ++idx) { \
int pos = idx; \
int output_idx = 0; \
int input_idx = 0; \
for (int i = 0; i < dims; ++i) { \
*(position + i) = pos / *(size + i); \
int out_stride = i < dims - 1 ? out_strides[i] : 1; \
output_idx += (*(position + i) * out_stride); \
input_idx += (*(position + i) * strides[perm[i]]); \
pos -= *(position + i) * (*(size + i)); \
} \
out_data[output_idx] = in_data[input_idx]; \
} \
return NNACL_OK; \
}

#define TRANSPOSE_DIMS(TYPE, NAME) \
void TransposeDims##NAME(const TYPE *in_data, TYPE *out_data, const int *output_shape, \
const TransposeParameter *transpose_param, int task_id, int thread_num) { \
NNACL_CHECK_NULL_RETURN_VOID(in_data); \
NNACL_CHECK_NULL_RETURN_VOID(out_data); \
NNACL_CHECK_NULL_RETURN_VOID(output_shape); \
NNACL_CHECK_NULL_RETURN_VOID(transpose_param); \
NNACL_CHECK_ZERO_RETURN(thread_num); \
const int *perm = transpose_param->perm_; \
const int *strides = transpose_param->strides_; \
const int *out_strides = transpose_param->out_strides_; \
@@ -220,6 +199,7 @@
int output_idx = 0; \
int input_idx = 0; \
for (int i = 0; i < num_axes; ++i) { \
NNACL_CHECK_ZERO_RETURN(*(out_strides + i)); \
int position = pos / *(out_strides + i); \
int out_stride = i < num_axes - 1 ? out_strides[i] : 1; \
output_idx += (position * out_stride); \
@@ -230,69 +210,48 @@
} \
}

#define DOTRANSPOSE(TYPE, NAME) \
int DoTranspose##NAME(const TYPE *in_data, TYPE *out_data, const int *output_shape, \
const TransposeParameter *transpose_param) { \
if (in_data == NULL || out_data == NULL) { \
return NNACL_ERR; \
} \
const int *perm = transpose_param->perm_; \
const int *strides = transpose_param->strides_; \
const int *out_strides = transpose_param->out_strides_; \
int data_size = transpose_param->data_num_ * sizeof(TYPE); \
int num_axes = transpose_param->num_axes_; \
bool needTranspose = false; \
for (int i = 1; i < num_axes; ++i) { \
if (perm[i] - perm[i - 1] != 1) { \
needTranspose = true; \
break; \
} \
} \
if (!needTranspose) { \
(void)memcpy(out_data, in_data, data_size); \
return NNACL_OK; \
} \
for (int i = 0; i < num_axes; ++i) { \
if (perm[i] < 0) { \
return NNACL_PARAM_INVALID; \
} \
} \
if (num_axes == 2) { \
TransposeDim2##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else if (num_axes == 3) { \
TransposeDim3##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else if (num_axes == 4) { \
TransposeDim4##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else if (num_axes == 5) { \
TransposeDim5##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else if (num_axes == 6) { \
TransposeDim6##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else { \
int *size = (int *)(malloc(num_axes * sizeof(int))); \
if (size == NULL) { \
return NNACL_ERR; \
} \
int *position = (int *)(malloc(num_axes * sizeof(int))); \
if (position == NULL) { \
free(size); \
size = NULL; \
return NNACL_ERR; \
} \
int ret = \
Transpose##NAME(in_data, out_data, strides, out_strides, perm, output_shape, num_axes, size, position); \
if (size != NULL) { \
free(size); \
size = NULL; \
} \
if (position != NULL) { \
free(position); \
position = NULL; \
} \
if (ret != NNACL_OK) { \
return NNACL_ERR; \
} \
} \
return NNACL_OK; \
#define DOTRANSPOSE(TYPE, NAME) \
int DoTranspose##NAME(const TYPE *in_data, TYPE *out_data, const int *output_shape, \
const TransposeParameter *transpose_param) { \
NNACL_CHECK_NULL_RETURN_ERR(in_data); \
NNACL_CHECK_NULL_RETURN_ERR(out_data); \
NNACL_CHECK_NULL_RETURN_ERR(output_shape); \
NNACL_CHECK_NULL_RETURN_ERR(transpose_param); \
const int *perm = transpose_param->perm_; \
const int *strides = transpose_param->strides_; \
const int *out_strides = transpose_param->out_strides_; \
int data_size = transpose_param->data_num_ * sizeof(TYPE); \
int num_axes = transpose_param->num_axes_; \
bool needTranspose = false; \
for (int i = 1; i < num_axes; ++i) { \
if (perm[i] - perm[i - 1] != 1) { \
needTranspose = true; \
break; \
} \
} \
if (!needTranspose) { \
(void)memcpy(out_data, in_data, data_size); \
return NNACL_OK; \
} \
for (int i = 0; i < num_axes; ++i) { \
if (perm[i] < 0) { \
return NNACL_PARAM_INVALID; \
} \
} \
if (num_axes == 2) { \
TransposeDim2##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else if (num_axes == 3) { \
TransposeDim3##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else if (num_axes == 4) { \
TransposeDim4##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else if (num_axes == 5) { \
TransposeDim5##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else if (num_axes == 6) { \
TransposeDim6##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \
} else { \
return NNACL_ERR; \
} \
return NNACL_OK; \
}

#define TRANSPOSE_TEMPLATE(TYPE, NAME) \
@@ -301,7 +260,6 @@
TRANSPOSE_FOUR_DIMS(TYPE, NAME) \
TRANSPOSE_FIVE_DIMS(TYPE, NAME) \
TRANSPOSE_SIX_DIMS(TYPE, NAME) \
TRANSPOSE_MULTI_DIMS(TYPE, NAME) \
TRANSPOSE_DIMS(TYPE, NAME) \
DOTRANSPOSE(TYPE, NAME)



+ 3
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unsorted_segment_sum_base.c View File

@@ -19,6 +19,9 @@
#define UNSORTEDSEGMENTSUM(type, type1) \
int UnsortedSegmentSum_##type##_##type1(const type *input, int unit_num, int input_dim1, const type1 *indices, \
type *output, int output_dim0, int output_dim1) { \
NNACL_CHECK_NULL_RETURN_ERR(input); \
NNACL_CHECK_NULL_RETURN_ERR(indices); \
NNACL_CHECK_NULL_RETURN_ERR(output); \
if (input_dim1 == 0) { \
return NNACL_ERR; \
} \


+ 3
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.c View File

@@ -17,6 +17,9 @@
#include "nnacl/base/unstack_base.h"

void Unstack(const void *input, void **output, const UnstackParameter *para, int data_size) {
NNACL_CHECK_NULL_RETURN_VOID(input);
NNACL_CHECK_NULL_RETURN_VOID(output);
NNACL_CHECK_NULL_RETURN_VOID(para);
const int8_t *in_addr = (int8_t *)input;
for (int j = 0; j < para->num_; j++) {
int8_t *out_addr = (int8_t *)output[j];


+ 10
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.c View File

@@ -175,6 +175,11 @@ void Fp16TransposeDim6(const float16_t *in_data, float16_t *out_data, const int

void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape,
const TransposeParameter *param, int task_id, int thread_num) {
NNACL_CHECK_NULL_RETURN_VOID(in_data);
NNACL_CHECK_NULL_RETURN_VOID(out_data);
NNACL_CHECK_NULL_RETURN_VOID(output_shape);
NNACL_CHECK_NULL_RETURN_VOID(param);
NNACL_CHECK_ZERO_RETURN(thread_num);
const int *perm = param->perm_;
const int *strides = param->strides_;
const int *out_strides = param->out_strides_;
@@ -192,6 +197,7 @@ void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int
int output_idx = 0;
int input_idx = 0;
for (int i = 0; i < num_axes; ++i) {
NNACL_CHECK_ZERO_RETURN(*(out_strides + i));
int position = pos / *(out_strides + i);
int out_stride = i < num_axes - 1 ? out_strides[i] : 1;
output_idx += (position * out_stride);
@@ -204,9 +210,10 @@ void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int

int DoTransposeFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape,
const TransposeParameter *param) {
if (in_data == NULL || out_data == NULL) {
return NNACL_ERR;
}
NNACL_CHECK_NULL_RETURN_ERR(in_data);
NNACL_CHECK_NULL_RETURN_ERR(out_data);
NNACL_CHECK_NULL_RETURN_ERR(output_shape);
NNACL_CHECK_NULL_RETURN_ERR(param);
const int *perm = param->perm_;
const int *strides = param->strides_;
const int *out_strides = param->out_strides_;


+ 9
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c View File

@@ -173,9 +173,11 @@ void TransposeDim6Fp32(const float *in_data, float *out_data, const int *strides

void TransposeDimsFp32(const float *in_data, float *out_data, const int *output_shape,
const TransposeParameter *transpose_param, int task_id, int thread_num) {
if (thread_num == 0) {
return;
}
NNACL_CHECK_NULL_RETURN_VOID(in_data);
NNACL_CHECK_NULL_RETURN_VOID(out_data);
NNACL_CHECK_NULL_RETURN_VOID(output_shape);
NNACL_CHECK_NULL_RETURN_VOID(transpose_param);
NNACL_CHECK_ZERO_RETURN(thread_num);
int *perm = (int *)(transpose_param->perm_);
int *strides = (int *)(transpose_param->strides_);
int *out_strides = (int *)(transpose_param->out_strides_);
@@ -206,9 +208,10 @@ void TransposeDimsFp32(const float *in_data, float *out_data, const int *output_

int DoTransposeFp32(const float *in_data, float *out_data, const int *output_shape,
const TransposeParameter *transpose_param) {
if (in_data == NULL || out_data == NULL) {
return NNACL_ERR;
}
NNACL_CHECK_NULL_RETURN_ERR(in_data);
NNACL_CHECK_NULL_RETURN_ERR(out_data);
NNACL_CHECK_NULL_RETURN_ERR(output_shape);
NNACL_CHECK_NULL_RETURN_ERR(transpose_param);
int *perm = (int *)(transpose_param->perm_);
int *strides = (int *)(transpose_param->strides_);
int *out_strides = (int *)(transpose_param->out_strides_);


+ 10
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c View File

@@ -174,9 +174,10 @@ void TransposeDim6Int8(const int8_t *in_data, int8_t *out_data, const int *strid

int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape,
const TransposeParameter *transpose_param) {
if (in_data == NULL || out_data == NULL) {
return NNACL_NULL_PTR;
}
NNACL_CHECK_NULL_RETURN_ERR(in_data);
NNACL_CHECK_NULL_RETURN_ERR(out_data);
NNACL_CHECK_NULL_RETURN_ERR(output_shape);
NNACL_CHECK_NULL_RETURN_ERR(transpose_param);

const int *perm = transpose_param->perm_;
const int *strides = transpose_param->strides_;
@@ -222,6 +223,11 @@ int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_s

void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape,
const TransposeParameter *transpose_param, int task_id, int thread_num) {
NNACL_CHECK_NULL_RETURN_VOID(in_data);
NNACL_CHECK_NULL_RETURN_VOID(out_data);
NNACL_CHECK_NULL_RETURN_VOID(output_shape);
NNACL_CHECK_NULL_RETURN_VOID(transpose_param);
NNACL_CHECK_ZERO_RETURN(thread_num);
const int *perm = transpose_param->perm_;
const int *strides = transpose_param->strides_;
const int *out_strides = transpose_param->out_strides_;
@@ -239,6 +245,7 @@ void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *outpu
int output_idx = 0;
int input_idx = 0;
for (int i = 0; i < num_axes; ++i) {
NNACL_CHECK_ZERO_RETURN(*(out_strides + i));
int position = pos / *(out_strides + i);
int out_stride = i < num_axes - 1 ? out_strides[i] : 1;
output_idx += (position * out_stride);


+ 3
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/print_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/print_cpu_kernel.h"
#include <algorithm>
#include "ir/tensor.h"
@@ -24,6 +25,7 @@ namespace mindspore {
namespace kernel {
template <typename T>
void PrintCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_tensor_num = AnfAlgo::GetInputTensorNum(kernel_node);
for (size_t i = 0; i < input_tensor_num; ++i) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
@@ -51,7 +53,7 @@ bool PrintCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
} else {
ShapeVector shape;
(void)std::transform(input_shapes_[i].begin(), input_shapes_[i].end(), std::back_inserter(shape),
[](const size_t &value) { return static_cast<int64_t>(value); });
[](const size_t &value) { return SizeToLong(value); });
Tensor tensor(data_type, shape, inputs[i]->addr, input_sizes_[i] * sizeof(T));
std::cout << tensor.ToStringNoLimit() << std::endl;
}


+ 3
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/print_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PRINT_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PRINT_CPU_KERNEL_H_

#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"


+ 12
- 14
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc View File

@@ -23,7 +23,11 @@
namespace mindspore {
namespace kernel {
namespace ps {
constexpr size_t kEmbeddingLookUpProxyInputsNum = 2;
constexpr size_t kEmbeddingLookUpProxyOutputsNum = 1;

void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
EmbeddingLookUpCPUKernel::InitKernel(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
@@ -46,12 +50,12 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
}
std::vector<float> values;
std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
std::transform(indices_shape.begin(), indices_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
std::transform(output_shape.begin(), output_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
(void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
(void)std::transform(indices_shape.begin(), indices_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
(void)std::transform(output_shape.begin(), output_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
MS_LOG(INFO) << "Init embedding lookup proxy kernel, input shape:" << input_shape
<< ", indices_shape:" << indices_shape << ", output_shape:" << output_shape;
std::vector<int64_t> lens{SizeToLong(input_shape.size()), SizeToLong(indices_shape.size()),
@@ -66,12 +70,8 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != 2) {
MS_LOG(EXCEPTION) << "Inputs size is " << inputs.size() << ", but EmbeddingLookUpProxyKernel needs 2.";
}
if (outputs.size() != 1) {
MS_LOG(EXCEPTION) << "Outputs size is " << outputs.size() << ", but EmbeddingLookUpProxyKernel needs 1.";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEmbeddingLookUpProxyInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEmbeddingLookUpProxyOutputsNum, kernel_name_);
auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
size_t input_size = inputs[1]->size;
@@ -84,7 +84,6 @@ bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &i
auto ret = memcpy_s(lookup_ids.data(), lookup_ids.size() * sizeof(int), indices_addr, input_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
return false;
}
mindspore::ps::Worker::GetInstance().DoPSEmbeddingLookup(key_, lookup_ids, &lookup_result,
mindspore::ps::kEmbeddingLookupCmd);
@@ -92,7 +91,6 @@ bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &i
auto ret2 = memcpy_s(output_addr, outputs[0]->size, lookup_result.data(), output_size);
if (ret2 != EOK) {
MS_LOG(EXCEPTION) << "Lookup result memcpy failed.";
return false;
}
return true;
}


+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_



+ 6
- 7
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc View File

@@ -93,15 +93,14 @@ void EmbeddingLookUpPSKernel::UpdateEmbeddings(float *embedding_table, const siz
size_t dest_len = copy_len;
for (size_t i = 0; i < ids_size; ++i) {
int index = SizeToInt(lookup_ids[i]) - LongToInt(offset_);
if (index >= 0 && index < SizeToInt(first_dim_size_)) {
auto ret = memcpy_s(embedding_table + IntToSize(index) * outer_dim_size_, dest_len,
update_vals + i * outer_dim_size_, copy_len);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed.";
}
} else {
if (index < 0 || index >= SizeToInt(first_dim_size_)) {
MS_LOG(EXCEPTION) << "UpdateEmbeddings index invalid.";
}
auto ret = memcpy_s(embedding_table + IntToSize(index) * outer_dim_size_, dest_len,
update_vals + i * outer_dim_size_, copy_len);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed.";
}
}
}



+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_



+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pull_kernel.h View File

@@ -44,6 +44,7 @@ class PullKernel : public CPUKernel {
return true;
}
void Init(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but pull needs 2 inputs.";


+ 0
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/push_kernel.h View File

@@ -49,7 +49,6 @@ class PushKernel : public CPUKernel {
auto ret = memcpy_s(outputs[0]->addr, outputs[0]->size, &key_, sizeof(size_t));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
return false;
}
return true;
}


+ 3
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.cc View File

@@ -27,6 +27,7 @@ constexpr size_t kSparseApplyAdamPSInputsShapeSize = 11;

void SparseApplyAdamPSKernel::InitKernel(
const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(shapes);
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
if (shape_vec.size() < kSparseApplyAdamPSInputsShapeSize) {
@@ -68,7 +69,7 @@ void SparseApplyAdamPSKernel::InitKernel(
MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
}
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, "use_nesterov");
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, USE_NESTEROV);
}
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
@@ -79,7 +80,7 @@ void SparseApplyAdamPSKernel::InitKernel(

void SparseApplyAdamPSKernel::ReInit(const std::vector<std::vector<size_t>> &shapes) {
if (shapes.empty() || shapes[0].empty()) {
MS_LOG(EXCEPTION) << "Shape should not empty";
MS_LOG(EXCEPTION) << "Shape is empty";
}
const std::vector<size_t> &indices_shape = shapes[0];
indices_size_ = indices_shape[0];


+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.cc View File

@@ -24,6 +24,7 @@ constexpr size_t kSparseApplyFtrlPSInputSize = 5;

void SparseApplyFtrlPSKernel::InitKernel(
const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(shapes);
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
if (shape_vec.size() < kSparseApplyFtrlPSInputSize) {


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h View File

@@ -46,7 +46,7 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK

protected:
void ReInit(const std::vector<AddressPtr> &) override;
float init_accum_;
float init_accum_{0.1};
};
} // namespace ps
} // namespace kernel


+ 9
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.cc View File

@@ -23,14 +23,15 @@
namespace mindspore {
namespace kernel {
namespace ps {
constexpr size_t kSparseApplyLazyAdamPSInputSize = 5;
constexpr size_t kSparseApplyLazyAdamPSInputsSize = 11;

void SparseApplyLazyAdamPSKernel::InitKernel(
const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(shapes);
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
if (shape_vec.size() < kSparseApplyLazyAdamPSInputSize) {
MS_LOG(EXCEPTION) << "SparseApplyLazyAdamPSKernel needs " << kSparseApplyLazyAdamPSInputSize
if (shape_vec.size() < kSparseApplyLazyAdamPSInputsSize) {
MS_LOG(EXCEPTION) << "SparseApplyLazyAdamPSKernel needs " << kSparseApplyLazyAdamPSInputsSize
<< " input shapes, but got " << shape_vec.size();
}
std::vector<size_t> &var_shape = *(shape_vec[0]);
@@ -70,7 +71,7 @@ void SparseApplyLazyAdamPSKernel::InitKernel(
MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
}
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, "use_nesterov");
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, USE_NESTEROV);
}
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
@@ -89,6 +90,10 @@ void SparseApplyLazyAdamPSKernel::ReInit(const std::vector<std::vector<size_t>>
}

void SparseApplyLazyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
if (inputs.size() < kSparseApplyLazyAdamPSInputsSize) {
MS_LOG(EXCEPTION) << "Input shape size should not less than " << kSparseApplyLazyAdamPSInputsSize << ", but got "
<< inputs.size();
}
const auto &indices_addr = inputs[10];
indices_size_ = indices_addr->size / sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;


+ 15
- 27
mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.cc View File

@@ -20,6 +20,13 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kUniformIntInputsNum = 3;
constexpr size_t kUniformRealInputsNum = 1;
constexpr size_t kUniformIntOutputsNum = 1;
constexpr size_t kUniformRealOutputsNum = 1;
constexpr size_t kStandardNormalOutputsNum = 1;
} // namespace
void StandardNormal(float *output, std::normal_distribution<float> distribution,
std::default_random_engine random_generator, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
@@ -60,12 +67,6 @@ void LaunchStandardNormal(unsigned int seed, const std::vector<AddressPtr> &outp

void LaunchUniformInt(unsigned int seed, const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
if (inputs.size() != 3) {
MS_LOG(EXCEPTION) << "Expect input number 3, actual got input number " << inputs.size();
}
if (outputs.size() != 1) {
MS_LOG(EXCEPTION) << "Expect output number 1, actual got output number " << outputs.size();
}
// Init min/max values.
int min_val = reinterpret_cast<int *>(inputs[1]->addr)[0];
int max_val = reinterpret_cast<int *>(inputs[2]->addr)[0];
@@ -75,7 +76,6 @@ void LaunchUniformInt(unsigned int seed, const std::vector<AddressPtr> &inputs,

// Init output address.
auto output = reinterpret_cast<int *>(outputs[0]->addr);
MS_EXCEPTION_IF_NULL(output);

// Init sample number.
size_t num_sample = outputs[0]->size / sizeof(int);
@@ -92,15 +92,8 @@ void LaunchUniformInt(unsigned int seed, const std::vector<AddressPtr> &inputs,

void LaunchUniformReal(unsigned int seed, const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
if (inputs.size() != 1) {
MS_LOG(EXCEPTION) << "Expect input number 1, actual got input number " << inputs.size();
}
if (outputs.size() != 1) {
MS_LOG(EXCEPTION) << "Expect output number 1, actual got output number " << outputs.size();
}
// Init output address.
auto output = reinterpret_cast<float *>(outputs[0]->addr);
MS_EXCEPTION_IF_NULL(output);

// Init sample number.
size_t num_sample = outputs[0]->size / sizeof(int);
@@ -117,24 +110,14 @@ void LaunchUniformReal(unsigned int seed, const std::vector<AddressPtr> &inputs,

void RandomCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kRandomOpTypeMap.find(kernel_name);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kRandomOpTypeMap.find(kernel_name_);
if (iter == kRandomOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "Random operation " << kernel_name << " is not supported.";
MS_LOG(EXCEPTION) << "Random operation " << kernel_name_ << " is not supported.";
} else {
random_op_type_ = iter->second;
}

size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if ((random_op_type_ == RANDOM_OP_NORMAL) && input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but random op needs 1 input.";
}

size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but random op needs 1 output.";
}

seed_ = LongToInt(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed")));
seed2_ = LongToInt(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2")));
}
@@ -152,10 +135,15 @@ bool RandomCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, cons
}

if (random_op_type_ == RANDOM_OP_NORMAL) {
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kStandardNormalOutputsNum, kernel_name_);
LaunchStandardNormal(RNG_seed, outputs);
} else if (random_op_type_ == RANDOM_OP_UNIFORM_INT) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kUniformIntInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kUniformIntOutputsNum, kernel_name_);
LaunchUniformInt(RNG_seed, inputs, outputs);
} else if (random_op_type_ == RANDOM_OP_UNIFORM_REAL) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kUniformRealInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kUniformRealOutputsNum, kernel_name_);
LaunchUniformReal(RNG_seed, inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "Random operation " << random_op_type_ << " is not supported.";


+ 2
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.h View File

@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CPU_KERNEL_H_

#include <vector>
#include <string>
#include <map>


+ 8
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_interface.h"
@@ -22,12 +23,15 @@ namespace mindspore {
namespace kernel {
namespace {
constexpr auto kRanksGroup = "group";
constexpr size_t kReduceScatterInputsNum = 1;
constexpr size_t kReduceScatterOutputsNum = 1;
} // namespace

ReduceScatterCPUKernel::ReduceScatterCPUKernel() : op_type_(kMPIOpTypeSum) {}

void ReduceScatterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto primitive = AnfAlgo::GetCNodePrimitive(kernel_node);
MS_EXCEPTION_IF_NULL(primitive);
auto op = primitive->GetAttr("op");
@@ -46,8 +50,10 @@ void ReduceScatterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool ReduceScatterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kReduceScatterInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kReduceScatterOutputsNum, kernel_name_);
auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto output_data_num = outputs[0]->size / sizeof(float);
return MPIReduceScatter(input_addr, output_addr, ranks_group_, output_data_num, op_type_);
}


+ 3
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_SCATTER_CPU_KERNEL_H_

#include <vector>
#include <string>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"


+ 14
- 10
mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.cc View File

@@ -19,37 +19,41 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kReshapeInputsNum = 1;
constexpr size_t kReshapeOutputsNum = 1;
} // namespace

void ReshapeCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
node_wpt_ = kernel_node;
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
x_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
type_size_ = GetTypeByte(TypeIdToType(x_data_type_));
}

bool ReshapeCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto node_ = node_wpt_.lock();
if (!node_) {
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
}
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0);
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "Input or output empty!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kReshapeInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kReshapeOutputsNum, kernel_name_);
if (inputs[0]->size != outputs[0]->size) {
return false;
}

if (inputs[0]->addr == outputs[0]->addr) {
return true;
}

auto node = node_wpt_.lock();
if (!node) {
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
}
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
size_t mem_bits = type_size_;
for (size_t i = 0; i < x_shape.size(); ++i) {
mem_bits *= x_shape[i];
}
auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits);
if (ret != 0) {
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret;
}
return true;


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
* Copyright 2021-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.


+ 15
- 24
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.cc View File

@@ -21,24 +21,26 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kResizeBilinearInputSize = 4;
constexpr size_t kResizeBilinearInputsNum = 1;
constexpr size_t kResizeBilinearOutputsNum = 1;
constexpr size_t kResizeBilinearInputsShapeSize = 4;
constexpr size_t kResizeBilinearAttrSize = 2;
} // namespace

void ResizeBilinearCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
size_ = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, SIZE);
align_corners_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "align_corners");
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (shape_.size() < kResizeBilinearInputSize) {
MS_LOG(EXCEPTION) << "Input shape size should be " << kResizeBilinearInputSize << ", but got " << shape_.size();
if (shape_.size() != kResizeBilinearInputsShapeSize) {
MS_LOG(EXCEPTION) << "Input shape size should be " << kResizeBilinearInputsShapeSize << ", but got "
<< shape_.size();
}

if (size_.size() < kResizeBilinearAttrSize) {
MS_LOG(EXCEPTION) << "Attr SIZE shape size should be " << kResizeBilinearAttrSize << ", but got " << size_.size();
if (size_.size() != kResizeBilinearAttrSize) {
MS_LOG(EXCEPTION) << "Size attr requires " << kResizeBilinearAttrSize << " elements, but got " << size_.size();
}

size_t in_height = shape_[2];
size_t in_width = shape_[3];
size_t out_height = size_[0];
@@ -50,6 +52,8 @@ void ResizeBilinearCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool ResizeBilinearCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kResizeBilinearInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kResizeBilinearOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16, float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -62,10 +66,9 @@ bool ResizeBilinearCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inpu

template <typename T1, typename T2>
void ResizeBilinearCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<T1 *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T2 *>(outputs[0]->addr);

const std::vector<AddressPtr> &outputs) const {
const auto *input_addr = reinterpret_cast<T1 *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<T2 *>(outputs[0]->addr);
size_t batch_size = shape_[0];
size_t channel = shape_[1];
size_t in_height = shape_[2];
@@ -84,7 +87,6 @@ void ResizeBilinearCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs

std::vector<CachedInterpolation> ys(out_height + 1);
std::vector<CachedInterpolation> xs(out_width + 1);

ComputeInterpolationWeights(out_height, in_height, height_scale, ys.data());
ComputeInterpolationWeights(out_width, in_width, width_scale, xs.data());

@@ -111,16 +113,5 @@ void ResizeBilinearCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs
}
}
}

void ResizeBilinearCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "ResizeBilinear needs 1 inputs, but gets " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "ResizeBilinear expects 1 output, but gets" << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 2
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.h View File

@@ -36,11 +36,10 @@ class ResizeBilinearCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T1, typename T2>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const;

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
bool align_corners_{false};
float height_scale{1.0};


+ 16
- 24
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.cc View File

@@ -21,23 +21,25 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kResizeBilinearGradInput0Size = 4;
constexpr size_t kResizeBilinearGradInput1Size = 4;
constexpr size_t kResizeBilinearGradInputsNum = 2;
constexpr size_t kResizeBilinearGradOutputNum = 1;
constexpr size_t kResizeBilinearGradInputsDoutShapeSize = 4;
constexpr size_t kResizeBilinearGradInputsXShapeSize = 4;
} // namespace

void ResizeBilinearGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
size_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
align_corners_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "align_corners");
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (shape_.size() < kResizeBilinearGradInput0Size) {
MS_LOG(EXCEPTION) << "Input_0 shape size should be " << kResizeBilinearGradInput0Size << ", but got "
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
if (shape_.size() < kResizeBilinearGradInputsDoutShapeSize) {
MS_LOG(EXCEPTION) << "Input dout shape should be " << kResizeBilinearGradInputsDoutShapeSize << ", but got "
<< shape_.size();
}

if (size_.size() < kResizeBilinearGradInput1Size) {
MS_LOG(EXCEPTION) << "Input_1 shape size should be " << kResizeBilinearGradInput1Size << ", but got "
if (size_.size() < kResizeBilinearGradInputsXShapeSize) {
MS_LOG(EXCEPTION) << "Input x shape should be " << kResizeBilinearGradInputsXShapeSize << ", but got "
<< size_.size();
}

@@ -45,7 +47,6 @@ void ResizeBilinearGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
size_t in_width = shape_[3];
size_t out_height = size_[2];
size_t out_width = size_[3];

height_scale = Scaling(out_height, in_height, align_corners_);
width_scale = Scaling(out_width, in_width, align_corners_);
}
@@ -53,6 +54,8 @@ void ResizeBilinearGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool ResizeBilinearGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kResizeBilinearGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kResizeBilinearGradOutputNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -65,9 +68,9 @@ bool ResizeBilinearGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &

template <typename T>
void ResizeBilinearGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
auto dloss_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
const std::vector<AddressPtr> &outputs) const {
const auto *dloss_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);

auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size);
if (ret != EOK) {
@@ -111,16 +114,5 @@ void ResizeBilinearGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &in
}
}
}

void ResizeBilinearGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "ResizeBilinearGrad needs 2 inputs, but gets " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "ResizeBilinear Gradexpects 1 output, but gets" << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 5
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.h View File

@@ -36,15 +36,14 @@ class ResizeBilinearGradCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const;

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
bool align_corners_ = false;
float height_scale = 1.;
float width_scale = 1.;
bool align_corners_{false};
float height_scale{1.0};
float width_scale{1.0};
std::vector<size_t> size_;
std::vector<size_t> shape_;
};


+ 15
- 22
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,24 +21,26 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kResizeNearestNeighborInputSize = 4;
constexpr size_t kResizeNearestNeighborOutputSize = 2;
constexpr size_t kResizeNearestNeighborInputsNum = 1;
constexpr size_t kResizeNearestNeighborOutputNum = 1;
constexpr size_t kResizeNearestNeighborInputsShapeSize = 4;
constexpr size_t kResizeNearestNeighborAttrSize = 2;
} // namespace

void ResizeNearestNeighborCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<int64_t> output_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, SIZE);
align_corners_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "align_corners");
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (input_shape.size() < kResizeNearestNeighborInputSize) {
MS_LOG(EXCEPTION) << "Input_0 shape size should be " << kResizeNearestNeighborInputSize << ", but got "
if (input_shape.size() != kResizeNearestNeighborInputsShapeSize) {
MS_LOG(EXCEPTION) << "Input shape size should be " << kResizeNearestNeighborInputsShapeSize << ", but got "
<< input_shape.size();
}

if (output_size.size() < kResizeNearestNeighborOutputSize) {
MS_LOG(EXCEPTION) << "Output shape size should be " << kResizeNearestNeighborOutputSize << ", but got "
<< output_size.size();
if (output_size.size() != kResizeNearestNeighborAttrSize) {
MS_LOG(EXCEPTION) << "Size attr should be " << kResizeNearestNeighborAttrSize << ", but got " << output_size.size();
}

batch_size_ = input_shape[0];
@@ -55,6 +57,8 @@ void ResizeNearestNeighborCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool ResizeNearestNeighborCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kResizeNearestNeighborInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kResizeNearestNeighborOutputNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -74,8 +78,8 @@ bool ResizeNearestNeighborCPUKernel::Launch(const std::vector<kernel::AddressPtr
template <typename T>
void ResizeNearestNeighborCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);

if (out_height_ == in_height_ && out_width_ == in_width_) {
for (size_t i = 0; i < output_size_; ++i) {
@@ -99,16 +103,5 @@ void ResizeNearestNeighborCPUKernel::LaunchKernel(const std::vector<AddressPtr>
output_addr[i] = input_addr[input_pos];
}
}

void ResizeNearestNeighborCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "ResizeBilinear needs 1 inputs, but gets " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "ResizeBilinear expects 1 output, but gets" << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 1
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.h View File

@@ -36,11 +36,10 @@ class ResizeNearestNeighborCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
bool align_corners_{false};
size_t batch_size_{0};


+ 17
- 24
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_grad_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,23 +21,27 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kResizeNearestNeighborGradInputSize = 4;
constexpr size_t kResizeNearestNeighborGradOutputSize = 4;
constexpr size_t kResizeNearestNeighborGradInputsNum = 1;
constexpr size_t kResizeNearestNeighborGradOutputNum = 1;
constexpr size_t kResizeNearestNeighborGradInputsShapeSize = 4;
constexpr size_t kResizeNearestNeighborGradOutputsShapeSize = 4;
} // namespace

void ResizeNearestNeighborGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<size_t> output_size = AnfAlgo::GetOutputInferShape(kernel_node, 0);
align_corners_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "align_corners");
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
if (input_shape.size() < kResizeNearestNeighborGradInputSize) {
MS_LOG(EXCEPTION) << "Input_0 shape size should be " << kResizeNearestNeighborGradInputSize << ", but got "
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);

if (input_shape.size() != kResizeNearestNeighborGradInputsShapeSize) {
MS_LOG(EXCEPTION) << "Input shape size should be " << kResizeNearestNeighborGradInputsShapeSize << ", but got "
<< input_shape.size();
}

if (output_size.size() < kResizeNearestNeighborGradOutputSize) {
MS_LOG(EXCEPTION) << "Output shape size should be " << kResizeNearestNeighborGradOutputSize << ", but got "
if (output_size.size() != kResizeNearestNeighborGradOutputsShapeSize) {
MS_LOG(EXCEPTION) << "Output shape size should be " << kResizeNearestNeighborGradOutputsShapeSize << ", but got "
<< output_size.size();
}

@@ -54,6 +58,8 @@ void ResizeNearestNeighborGradCPUKernel::InitKernel(const CNodePtr &kernel_node)
bool ResizeNearestNeighborGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kResizeNearestNeighborGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kResizeNearestNeighborGradOutputNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -73,9 +79,8 @@ bool ResizeNearestNeighborGradCPUKernel::Launch(const std::vector<kernel::Addres
template <typename T>
void ResizeNearestNeighborGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
auto dloss_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);

const auto *dloss_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Output buffer memset failed, ret:" << ret;
@@ -83,7 +88,6 @@ void ResizeNearestNeighborGradCPUKernel::LaunchKernel(const std::vector<AddressP

size_t in_hw_size = in_width_ * in_height_;
size_t out_hw_size = out_width_ * out_height_;

for (size_t b = 0; b < batch_size_; ++b) {
for (size_t c = 0; c < channel_; ++c) {
for (size_t h = 0; h < in_height_; ++h) {
@@ -102,16 +106,5 @@ void ResizeNearestNeighborGradCPUKernel::LaunchKernel(const std::vector<AddressP
}
}
}

void ResizeNearestNeighborGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "ResizeBilinearGrad needs 1 inputs, but gets " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "ResizeBilinear Gradexpects 1 output, but gets" << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 1
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_grad_cpu_kernel.h View File

@@ -36,11 +36,10 @@ class ResizeNearestNeighborGradCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
bool align_corners_{false};
size_t batch_size_{0};


+ 8
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/rmsprop_cpu_kernel.cc View File

@@ -21,6 +21,11 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kCenteredRMSPropInputsNum = 9;
constexpr size_t kRMSPropInputsNum = 5;
} // namespace

template <typename T>
void RMSPropCPUKernel<T>::LaunchRMSPropUnuseCenter(T *variable, T *mean_square, T *moment, T *gradients,
float *learning_rate) {
@@ -71,6 +76,7 @@ void RMSPropCPUKernel<T>::LaunchRMSPropUseCenter(T *variable, T *mean_square, T
template <typename T>
void RMSPropCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto node_name = AnfAlgo::GetCNodeName(kernel_node);
if (node_name == "ApplyCenteredRMSProp") {
use_center_ = true;
@@ -92,6 +98,7 @@ template <typename T>
bool RMSPropCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &) {
if (!use_center_) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kCenteredRMSPropInputsNum, kernel_name_);
float *variable = reinterpret_cast<float *>(inputs[0]->addr);
float *mean_square = reinterpret_cast<float *>(inputs[1]->addr);
float *moment = reinterpret_cast<float *>(inputs[2]->addr);
@@ -102,6 +109,7 @@ bool RMSPropCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
MS_LOG(INFO) << "RMSPropCPUKernel lens:" << lens << " size_:" << size_;
LaunchRMSPropUnuseCenter(variable, mean_square, moment, gradients, learning_rate);
} else {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kRMSPropInputsNum, kernel_name_);
T *variable = reinterpret_cast<float *>(inputs[0]->addr);
T *mean_gradients = reinterpret_cast<float *>(inputs[1]->addr);
T *mean_square = reinterpret_cast<float *>(inputs[2]->addr);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/rmsprop_cpu_kernel.h View File

@@ -27,7 +27,7 @@ namespace kernel {
template <typename T>
class RMSPropCPUKernel : public CPUKernel {
public:
RMSPropCPUKernel() {}
RMSPropCPUKernel() = default;
~RMSPropCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;


+ 55
- 58
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.cc View File

@@ -22,15 +22,34 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kInputNum = 3;
constexpr size_t kOutputNum = 1;
constexpr size_t kScatterArithmeticInputsNum = 3;
constexpr size_t kScatterArithmeticOutputsNum = 1;
} // namespace

template <typename T>
void ScatterArithmeticCPUKernel<T>::InitComputeFunc() {
static const std::map<std::string, TypeComputeFunc> scatterArithmeticFuncMap{
{prim::kPrimScatterAdd->name(), &ScatterArithmeticCPUKernel<T>::ScatterAdd},
{prim::kPrimScatterSub->name(), &ScatterArithmeticCPUKernel<T>::ScatterSub},
{prim::kPrimScatterMul->name(), &ScatterArithmeticCPUKernel<T>::ScatterMul},
{prim::kPrimScatterDiv->name(), &ScatterArithmeticCPUKernel<T>::ScatterDiv},
{prim::kPrimScatterMax->name(), &ScatterArithmeticCPUKernel<T>::ScatterMax},
{prim::kPrimScatterMin->name(), &ScatterArithmeticCPUKernel<T>::ScatterMin},
{prim::kPrimScatterUpdate->name(), &ScatterArithmeticCPUKernel<T>::ScatterUpdate}};
if (scatterArithmeticFuncMap.find(kernel_name_) == scatterArithmeticFuncMap.end()) {
MS_LOG(EXCEPTION) << "ScatterArithmeticCPUKernel does not support " << kernel_name_;
}
compute_func_ = scatterArithmeticFuncMap.at(kernel_name_);
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() < 1) {
MS_LOG(EXCEPTION) << "Input shape size should not less than 1";
}
input_size_ = 1;
inner_size_ = 1;
if (input_shape.empty()) {
@@ -46,52 +65,30 @@ void ScatterArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
for (size_t i = 0; i < indices_shape.size(); i++) {
indices_size_ *= indices_shape[i];
}
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) const {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != kInputNum) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterAdd needs 3 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != kOutputNum) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterAdd has 1 output.";
}
InitComputeFunc();
}

template <typename T>
bool ScatterArithmeticCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
static const std::map<std::string, std::function<void(ScatterArithmeticCPUKernel *, T *, const int *, const T *)>>
kScatterArithmeticBinOpFuncMap{{"ScatterAdd", &ScatterArithmeticCPUKernel<T>::ScatterAdd},
{"ScatterSub", &ScatterArithmeticCPUKernel<T>::ScatterSub},
{"ScatterMul", &ScatterArithmeticCPUKernel<T>::ScatterMul},
{"ScatterDiv", &ScatterArithmeticCPUKernel<T>::ScatterDiv},
{"ScatterMax", &ScatterArithmeticCPUKernel<T>::ScatterMax},
{"ScatterMin", &ScatterArithmeticCPUKernel<T>::ScatterMin},
{"ScatterUpdate", &ScatterArithmeticCPUKernel<T>::ScatterUpdate}};
if (kScatterArithmeticBinOpFuncMap.find(kernel_name_) != kScatterArithmeticBinOpFuncMap.end()) {
T *input = reinterpret_cast<T *>(inputs[INPUT]->addr);
int *indices = reinterpret_cast<int *>(inputs[INDICES]->addr);
T *updates = reinterpret_cast<T *>(inputs[UPDATES]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);
kScatterArithmeticBinOpFuncMap.at(kernel_name_)(this, input, indices, updates);
auto bufferSize = outputs[0]->size;
auto ret = memcpy_s(output, bufferSize, input, input_size_ * sizeof(T));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Memory copy failed!";
}
} else {
MS_LOG(EXCEPTION) << "Not support operator:" << kernel_name_;
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kScatterArithmeticInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kScatterArithmeticOutputsNum, kernel_name_);
auto *input = reinterpret_cast<T *>(inputs[INPUT_INDEX_]->addr);
auto *indices = reinterpret_cast<int *>(inputs[INDICES_INDEX_]->addr);
auto *updates = reinterpret_cast<T *>(inputs[UPDATES_INDEX_]->addr);
auto *output = reinterpret_cast<T *>(outputs[OUTPUT_INDEX_]->addr);
compute_func_(this, input, indices, updates);
auto bufferSize = outputs[OUTPUT_INDEX_]->size;
auto ret = memcpy_s(output, bufferSize, input, input_size_ * sizeof(T));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Memory copy failed!";
}
return true;
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterAdd(T *input, const int *indices, const T *updates) {
void ScatterArithmeticCPUKernel<T>::ScatterAdd(T *input, const int *indices, const T *updates) const {
for (size_t i = 0; i < indices_size_; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
@@ -102,7 +99,7 @@ void ScatterArithmeticCPUKernel<T>::ScatterAdd(T *input, const int *indices, con
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterSub(T *input, const int *indices, const T *updates) {
void ScatterArithmeticCPUKernel<T>::ScatterSub(T *input, const int *indices, const T *updates) const {
for (size_t i = 0; i < indices_size_; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
@@ -113,7 +110,7 @@ void ScatterArithmeticCPUKernel<T>::ScatterSub(T *input, const int *indices, con
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterMul(T *input, const int *indices, const T *updates) {
void ScatterArithmeticCPUKernel<T>::ScatterMul(T *input, const int *indices, const T *updates) const {
for (size_t i = 0; i < indices_size_; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
@@ -124,32 +121,32 @@ void ScatterArithmeticCPUKernel<T>::ScatterMul(T *input, const int *indices, con
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterDiv(T *input, const int *indices, const T *updates) {
void ScatterArithmeticCPUKernel<T>::ScatterDiv(T *input, const int *indices, const T *updates) const {
for (size_t i = 0; i < indices_size_; i++) {
for (size_t j = 0; j < inner_size_; j++) {
auto dividend = input[indices[i] * inner_size_ + j];
auto divisor = updates[i * inner_size_ + j];
if (divisor == 0) {
if (dividend == 0) {
input[indices[i] * inner_size_ + j] = std::numeric_limits<T>::quiet_NaN();
continue;
}
if (std::numeric_limits<T>::has_infinity) {
input[indices[i] * inner_size_ + j] =
dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
} else {
input[indices[i] * inner_size_ + j] =
dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
}
if (divisor != 0) {
input[indices[i] * inner_size_ + j] = dividend / divisor;
continue;
}
if (dividend == 0) {
input[indices[i] * inner_size_ + j] = std::numeric_limits<T>::quiet_NaN();
continue;
}
input[indices[i] * inner_size_ + j] = dividend / divisor;
if (std::numeric_limits<T>::has_infinity) {
input[indices[i] * inner_size_ + j] =
dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
} else {
input[indices[i] * inner_size_ + j] =
dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
}
}
}
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterMax(T *input, const int *indices, const T *updates) {
void ScatterArithmeticCPUKernel<T>::ScatterMax(T *input, const int *indices, const T *updates) const {
for (size_t i = 0; i < indices_size_; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
@@ -162,7 +159,7 @@ void ScatterArithmeticCPUKernel<T>::ScatterMax(T *input, const int *indices, con
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterMin(T *input, const int *indices, const T *updates) {
void ScatterArithmeticCPUKernel<T>::ScatterMin(T *input, const int *indices, const T *updates) const {
for (size_t i = 0; i < indices_size_; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;
@@ -175,7 +172,7 @@ void ScatterArithmeticCPUKernel<T>::ScatterMin(T *input, const int *indices, con
}

template <typename T>
void ScatterArithmeticCPUKernel<T>::ScatterUpdate(T *input, const int *indices, const T *updates) {
void ScatterArithmeticCPUKernel<T>::ScatterUpdate(T *input, const int *indices, const T *updates) const {
for (size_t i = 0; i < indices_size_; i++) {
auto base_index_updates = i * inner_size_;
auto base_index_input = indices[i] * inner_size_;


+ 19
- 21
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.h View File

@@ -37,27 +37,25 @@ class ScatterArithmeticCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node) const;

void ScatterAdd(T *input, const int *indices, const T *updates);

void ScatterSub(T *input, const int *indices, const T *updates);

void ScatterMul(T *input, const int *indices, const T *updates);

void ScatterDiv(T *input, const int *indices, const T *updates);

void ScatterMax(T *input, const int *indices, const T *updates);

void ScatterMin(T *input, const int *indices, const T *updates);

void ScatterUpdate(T *input, const int *indices, const T *updates);

size_t input_size_{1};
size_t inner_size_{1};
size_t indices_size_{1};
std::string kernel_name_;
enum input_list_ { INPUT, INDICES, UPDATES };
void InitComputeFunc();
void ScatterAdd(T *input, const int *indices, const T *updates) const;
void ScatterSub(T *input, const int *indices, const T *updates) const;
void ScatterMul(T *input, const int *indices, const T *updates) const;
void ScatterDiv(T *input, const int *indices, const T *updates) const;
void ScatterMax(T *input, const int *indices, const T *updates) const;
void ScatterMin(T *input, const int *indices, const T *updates) const;
void ScatterUpdate(T *input, const int *indices, const T *updates) const;

using TypeComputeFunc = std::function<void(ScatterArithmeticCPUKernel *, T *, const int *, const T *)>;

TypeComputeFunc compute_func_;
size_t input_size_{0};
size_t inner_size_{0};
size_t indices_size_{0};
const size_t INPUT_INDEX_{0};
const size_t INDICES_INDEX_{1};
const size_t UPDATES_INDEX_{2};
const size_t OUTPUT_INDEX_{0};
};

MS_REG_CPU_KERNEL_T(ScatterAdd,


+ 10
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc View File

@@ -22,14 +22,21 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kScatterNdUpdateInputsNum = 3;
constexpr size_t kScatterNdUpdateOutputsNum = 1;
constexpr size_t kMinIndiceRank = 2;

template <typename T>
void Compute(const ComputeParams<T> *params, const size_t start, const size_t end) {
MS_EXCEPTION_IF_NULL(params);
T *x = params->x_;
int *indices = params->indices_;
T *updates = params->updates_;
std::vector<int> *out_strides = params->out_strides_;
MS_EXCEPTION_IF_NULL(x);
MS_EXCEPTION_IF_NULL(indices);
MS_EXCEPTION_IF_NULL(updates);
MS_EXCEPTION_IF_NULL(out_strides);

for (int i = SizeToInt(start); i < SizeToInt(end); ++i) {
int offset = 0;
@@ -51,7 +58,7 @@ void Compute(const ComputeParams<T> *params, const size_t start, const size_t en

void ScatterNdUpdateCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
Check(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto updates_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
@@ -93,6 +100,8 @@ void ScatterNdUpdateCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool ScatterNdUpdateCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kScatterNdUpdateInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kScatterNdUpdateOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -136,16 +145,5 @@ void ScatterNdUpdateCPUKernel::LaunchKernel(const std::vector<AddressPtr> &input
MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret;
}
}

void ScatterNdUpdateCPUKernel::Check(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterNdUpdate needs 3 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterNdUpdate needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 1
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.h View File

@@ -46,11 +46,10 @@ class ScatterNdUpdateCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);

private:
void Check(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
int unit_size_{0};
size_t num_units_{0};


+ 5
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc View File

@@ -23,13 +23,14 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kInputSize = 2;
constexpr size_t kOutputSize = 1;
constexpr size_t kSearchSortedInputsNum = 2;
constexpr size_t kSearchSortedOutputsNum = 1;
} // namespace

template <typename S, typename T>
void SearchSortedCPUKernel<S, T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
right_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "right");
sequence_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
values_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
@@ -76,16 +77,8 @@ bool SearchSortedCPUKernel<S, T>::Launch(const std::vector<kernel::AddressPtr> &
template <typename S, typename T>
void SearchSortedCPUKernel<S, T>::CheckParam(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
// inputs: sequence, values
if (inputs.size() != kInputSize) {
MS_LOG(EXCEPTION) << "Input number is: " << inputs.size() << ", but SearchSorted needs" << kInputSize << " inputs.";
}

// outputs: positions
if (outputs.size() != kOutputSize) {
MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SearchSorted needs " << kOutputSize
<< " outputs";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSearchSortedInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSearchSortedOutputsNum, kernel_name_);

if (outputs[0]->size / sizeof(T) != inputs[1]->size / sizeof(S)) {
MS_LOG(EXCEPTION) << "The output dimensions " << outputs[0]->size << " must match the dimensions of input values "


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h View File

@@ -39,10 +39,10 @@ class SearchSortedCPUKernel : public CPUKernel {
void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

bool right_{false};
size_t search_len{0};
std::vector<size_t> sequence_shape_;
std::vector<size_t> values_shape_;
std::vector<size_t> output_shape_;
size_t search_len{0};
};

MS_REG_CPU_KERNEL_T_S(


+ 12
- 13
mindspore/ccsrc/backend/kernel_compiler/cpu/select_cpu_kernel.cc View File

@@ -19,31 +19,30 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSelectInputsNum = 3;
constexpr size_t kSelectOutputsNum = 1;
} // namespace

template <typename T>
void SelectCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SelectCpuKernel needs 3 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SelectCpuKernel needs 1 output.";
}
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
for (size_t x : shape) {
element_num_ *= x;
}
return;
}

template <typename T>
bool SelectCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) {
auto input_cond = reinterpret_cast<bool *>(inputs[0]->addr);
auto input_x = reinterpret_cast<T *>(inputs[1]->addr);
auto input_y = reinterpret_cast<T *>(inputs[2]->addr);
auto output = reinterpret_cast<T *>(outputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSelectInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSelectOutputsNum, kernel_name_);
auto *input_cond = reinterpret_cast<bool *>(inputs[0]->addr);
auto *input_x = reinterpret_cast<T *>(inputs[1]->addr);
auto *input_y = reinterpret_cast<T *>(inputs[2]->addr);
auto *output = reinterpret_cast<T *>(outputs[0]->addr);
for (size_t pos = 0; pos < element_num_; pos++) {
output[pos] = input_cond[pos] ? input_x[pos] : input_y[pos];
}


+ 5
- 18
mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc View File

@@ -15,42 +15,29 @@
*/

#include "backend/kernel_compiler/cpu/sgd_cpu_kernel.h"

#include <thread>
#include <vector>

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kInputSize = 6;
constexpr size_t kOutputSize = 1;
constexpr size_t kSGDInputsNum = 6;
constexpr size_t kSGDOutputsNum = 1;
} // namespace
template <typename T>
void SGDCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
dampening_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "dampening");
weight_decay_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "weight_decay");
nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "nesterov");
}

template <typename T>
void SGDCPUKernel<T>::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
// inputs: param, grad, lr, accum, momentum, stat
if (inputs.size() != kInputSize) {
MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but SGD needs 6 inputs.";
}

// output: output_param
if (outputs.size() != kOutputSize) {
MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SGD needs 1 outputs.";
}
}

template <typename T>
bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) {
CheckParam(inputs, outputs);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSGDInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSGDOutputsNum, kernel_name_);
auto param = reinterpret_cast<T *>(inputs[PARAM]->addr);
auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr);
auto lr = reinterpret_cast<T *>(inputs[LR]->addr);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SGD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SGD_CPU_KERNEL_H_

@@ -35,7 +36,6 @@ class SGDCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
float dampening_{0.0};
float weight_decay_{0.0};
bool nesterov_{true};


+ 14
- 18
mindspore/ccsrc/backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_cpu_kernel.cc View File

@@ -19,9 +19,14 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSigmoidCrossEntropyWithLogitsInputsNum = 2;
constexpr size_t kSigmoidCrossEntropyWithLogitsOutputsNum = 1;
} // namespace

void SigmoidCrossEntropyWithLogitsCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
for (const uint64_t &d : x_shape) {
@@ -45,12 +50,14 @@ bool SigmoidCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kernel::Ad
template <typename T>
void SigmoidCrossEntropyWithLogitsCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
auto logits_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto labels_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
T zero = (T)0.0;
T one = (T)1.0;
T two = (T)2.0;
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSigmoidCrossEntropyWithLogitsInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSigmoidCrossEntropyWithLogitsOutputsNum, kernel_name_);
auto *logits_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *labels_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto zero = static_cast<T>(0.0);
auto one = static_cast<T>(1.0);
auto two = static_cast<T>(2.0);
for (uint64_t i = 0; i < tensor_size_; ++i) {
if (logits_addr[i] >= zero) {
output_addr[i] = static_cast<T>(log1p(static_cast<float>(exp(logits_addr[i] - two * logits_addr[i])))) -
@@ -60,16 +67,5 @@ void SigmoidCrossEntropyWithLogitsCPUKernel::LaunchKernel(const std::vector<Addr
}
}
}

void SigmoidCrossEntropyWithLogitsCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "SigmoidCrossEntropyWithLogitsCPUKernel needs 2 inputs, but gets " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "SigmoidCrossEntropyWithLogitsCPUKernel expects 1 output, but gets" << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 3
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -34,11 +35,10 @@ class SigmoidCrossEntropyWithLogitsCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
uint64_t tensor_size_{1};
};


+ 15
- 19
mindspore/ccsrc/backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_grad_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,9 +19,14 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSigmoidCrossEntropyWithLogitsGradInputsNum = 3;
constexpr size_t kSigmoidCrossEntropyWithLogitsGradOutputsNum = 1;
} // namespace

void SigmoidCrossEntropyWithLogitsGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
for (const uint64_t &d : x_shape) {
@@ -32,6 +37,8 @@ void SigmoidCrossEntropyWithLogitsGradCPUKernel::InitKernel(const CNodePtr &kern
bool SigmoidCrossEntropyWithLogitsGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSigmoidCrossEntropyWithLogitsGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSigmoidCrossEntropyWithLogitsGradOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) {
@@ -45,12 +52,12 @@ bool SigmoidCrossEntropyWithLogitsGradCPUKernel::Launch(const std::vector<kernel
template <typename T>
void SigmoidCrossEntropyWithLogitsGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
auto logits_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto labels_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto dloss_addr = reinterpret_cast<T *>(inputs[2]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
T zero = (T)0.0;
T one = (T)1.0;
auto *logits_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *labels_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto *dloss_addr = reinterpret_cast<T *>(inputs[2]->addr);
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto zero = static_cast<T>(0.0);
auto one = static_cast<T>(1.0);
for (uint64_t i = 0; i < tensor_size_; ++i) {
if (logits_addr[i] >= zero) {
output_addr[i] = (one / (one + static_cast<T>(exp(-logits_addr[i]))) - labels_addr[i]) * dloss_addr[i];
@@ -60,16 +67,5 @@ void SigmoidCrossEntropyWithLogitsGradCPUKernel::LaunchKernel(const std::vector<
}
}
}

void SigmoidCrossEntropyWithLogitsGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "SigmoidCrossEntropyWithLogitsCPUKernel needs 2 inputs, but gets " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "SigmoidCrossEntropyWithLogitsCPUKernel expects 1 output, but gets" << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 3
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_grad_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_CPU_KERNEL_H_

#include <memory>
#include <unordered_map>
#include <vector>
@@ -34,11 +35,10 @@ class SigmoidCrossEntropyWithLogitsGradCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
uint64_t tensor_size_{1};
};


+ 16
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,15 +15,18 @@
*/

#include "backend/kernel_compiler/cpu/slice_cpu_kernel.h"

#include <algorithm>
#include <unordered_map>

#include "common/thread_pool.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSliceInputsNum = 1;
constexpr size_t kSliceOutputsNum = 1;
} // namespace

int NormalizeBeginPos(int begin_pos, int dim_len) {
if (begin_pos < 0) {
int normal_pos = begin_pos + dim_len;
@@ -34,6 +37,7 @@ int NormalizeBeginPos(int begin_pos, int dim_len) {

void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
static const std::unordered_map<TypeId, int> type_size_map = {{kNumberTypeBool, sizeof(bool)},
{kNumberTypeInt32, sizeof(int)},
{kNumberTypeFloat32, sizeof(float)},
@@ -84,29 +88,29 @@ void SliceCPUKernel::InitSliceParam(const std::vector<size_t> &input_shape, cons
slice_param_.param_length_ = DIMENSION_8D;
}

void SliceSimpleDim2(const int8_t *input, int8_t *output, SliceParameter *param, int data_size, size_t row_size) {
size_t copy_size = data_size * param->size_[1];
void SliceSimpleDim2(const int8_t *input, int8_t *output, const SliceParameter *param, int data_size, size_t row_size) {
size_t copy_size = IntToSize(data_size * param->size_[1]);
for (size_t i = 0; i < row_size; ++i) {
auto dst = output + data_size * param->size_[1] * i;
auto src = input + data_size * (param->shape_[1] * i + param->begin_[1]);
(void)memcpy_s(dst, copy_size, src, copy_size);
auto ret = memcpy_s(dst, copy_size, src, copy_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Memcpy failed.";
}
}
}

bool SliceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != 1 || outputs.size() != 1) {
MS_LOG(ERROR) << "Slice requires 1 input and 1 output, but got " << inputs.size() << " input and " << outputs.size()
<< " output.";
return false;
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSliceInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSliceOutputsNum, kernel_name_);
if (outputs[0]->size == 0) {
MS_LOG(WARNING) << "Slice output memory size should be greater than 0, but got 0.";
return true;
}

auto input_addr = inputs[0]->addr;
auto output_addr = outputs[0]->addr;

if (origin_dim_size_ == 2) {
auto task = [this, &input_addr, &output_addr](size_t start, size_t end) {
auto src =


+ 1
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@

#include <vector>
#include <memory>

#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include "nnacl/base/slice_base.h"
@@ -39,7 +38,6 @@ class SliceCPUKernel : public CPUKernel {
private:
void InitSliceParam(const std::vector<size_t> &input_shape, const std::vector<int64_t> &begin,
const std::vector<int64_t> &size);

size_t origin_dim_size_{0};
int data_size_{4};
SliceParameter slice_param_;


+ 35
- 30
mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc View File

@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h"
#include <algorithm>
#include "runtime/device/cpu/cpu_device_address.h"
@@ -20,11 +21,22 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSliceGradInputsNum = 2;
constexpr size_t kStridedSliceGradInputsNum = 1;
constexpr size_t kOutputsNum = 1;
} // namespace

void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.empty() || input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but SliceGradGpuKernel only support 1-4D.";
}

std::vector<int64_t> begin_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, BEGIN);
(void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_),
[](const int64_t &value) { return LongToInt(value); });
@@ -51,6 +63,7 @@ void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
}
FormatArgs(false);
}

ExpandAllMemberDims();
CPUKernelUtils::GetElementNumEveryDim(input_shape_, &input_element_num_);
CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_);
@@ -60,10 +73,10 @@ void SliceGradCPUKernel::ExpandAllMemberDims() {
auto output_len = output_shape_.size();
if (output_len < 4) {
for (size_t i = 0; i < 4 - output_len; ++i) {
output_shape_.insert(output_shape_.begin(), 1);
begin_.insert(begin_.begin(), 0);
strides_.insert(strides_.begin(), 1);
end_.insert(end_.begin(), 1);
(void)output_shape_.insert(output_shape_.begin(), 1);
(void)begin_.insert(begin_.begin(), 0);
(void)strides_.insert(strides_.begin(), 1);
(void)end_.insert(end_.begin(), 1);
}
}
for (size_t i = 0; i < 4; ++i) {
@@ -79,7 +92,12 @@ void SliceGradCPUKernel::ExpandAllMemberDims() {

bool SliceGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
bool ret{true};
size_t expect_inputs_num =
kernel_name_ == prim::kPrimSliceGrad->name() ? kSliceGradInputsNum : kStridedSliceGradInputsNum;
CHECK_KERNEL_INPUTS_NUM(inputs.size(), expect_inputs_num, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);

bool ret = true;
if (dtype_ == kNumberTypeInt32) {
ret = LaunchKernel<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
@@ -96,9 +114,9 @@ bool SliceGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, c

template <typename T>
bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
T *input_addr = reinterpret_cast<T *>(inputs[0]->addr);
T *output_addr = reinterpret_cast<T *>(outputs[0]->addr);
const std::vector<kernel::AddressPtr> &outputs) const {
auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);

auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size);
if (ret != EOK) {
@@ -113,16 +131,17 @@ bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inp
size_t out_step_size[3] = {IntToSize(strides_[0]) * output_element_num_[0],
IntToSize(strides_[1]) * output_element_num_[1],
IntToSize(strides_[2]) * output_element_num_[2]};
auto in_n_offset = 0;
auto out_n_offset = out_start_offset[0];
size_t in_n_offset = 0;
size_t out_n_offset = out_start_offset[0];
size_t input_index = 0;
for (int i = begin_[0]; stride_signs[0] * i < stride_signs[0] * end_[0];
i += strides_[0], in_n_offset += input_element_num_[0], out_n_offset += out_step_size[0]) {
if (can_copy_memory[0]) {
CopyDataToOutput<T>(inputs, in_n_offset, outputs, out_n_offset, input_element_num_[0], 0);
continue;
}
auto in_c_offset = 0;
auto out_c_offset = out_start_offset[1];
size_t in_c_offset = 0;
size_t out_c_offset = out_start_offset[1];
for (int j = begin_[1]; stride_signs[1] * j < stride_signs[1] * end_[1];
j += strides_[1], in_c_offset += input_element_num_[1], out_c_offset += out_step_size[1]) {
if (can_copy_memory[1]) {
@@ -130,8 +149,8 @@ bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inp
input_element_num_[1], 1);
continue;
}
auto in_h_offset = 0;
auto out_h_offset = out_start_offset[2];
size_t in_h_offset = 0;
size_t out_h_offset = out_start_offset[2];
for (int k = begin_[2]; stride_signs[2] * k < stride_signs[2] * end_[2];
k += strides_[2], in_h_offset += input_element_num_[2], out_h_offset += out_step_size[2]) {
if (can_copy_memory[2]) {
@@ -140,7 +159,7 @@ bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inp
continue;
}
for (int m = begin_[3]; stride_signs[3] * m < stride_signs[3] * end_[3]; m += strides_[3]) {
output_addr[out_n_offset + out_c_offset + out_h_offset + IntToSize(m)] = *input_addr++;
output_addr[out_n_offset + out_c_offset + out_h_offset + IntToSize(m)] = input_addr[input_index++];
}
}
}
@@ -223,19 +242,5 @@ void SliceGradCPUKernel::FormatArgs(bool stride) {
}
}
}

void SliceGradCPUKernel::CheckParam(const CNodePtr &kernel_node) const {
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SliceGradGpuKernel needs 1 output.";
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but SliceGradGpuKernel only support 4d or lower.";
}
if (input_shape.size() == 0) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", scalar is not supported.";
}
}
} // namespace kernel
} // namespace mindspore

+ 6
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_GRAD_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
@@ -34,16 +36,16 @@ class SliceGradCPUKernel : public CPUKernel {

private:
template <typename T>
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) const;
template <typename T>
void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t in_offset,
const std::vector<kernel::AddressPtr> &outputs, size_t out_offset, size_t copy_num,
int id) const;

void ExpandAllMemberDims();
bool CanCopyMemoryOnAxis(size_t dim) const;
int SignOfStride(size_t axis) const;

void CheckParam(const CNodePtr &kernel_node) const;
void FormatArgs(bool stride);
std::vector<int> begin_;
std::vector<int> end_;


+ 14
- 19
mindspore/ccsrc/backend/kernel_compiler/cpu/smooth_l1_loss_cpu_kernel.cc View File

@@ -19,11 +19,19 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSmoothL1LossInputsNum = 2;
constexpr size_t kSmoothL1LossOutputsNum = 1;
} // namespace

template <typename T>
void SmoothL1LossCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
beta_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "beta");
CheckParam(kernel_node);
if (beta_ == 0.0) {
MS_LOG(EXCEPTION) << "Attr beta can not be zero.";
}
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
for (const uint64_t &d : x_shape) {
tensor_size_ *= d;
@@ -34,9 +42,11 @@ template <typename T>
bool SmoothL1LossCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto predict_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto target_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto result_addr = reinterpret_cast<T *>(outputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSmoothL1LossInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSmoothL1LossOutputsNum, kernel_name_);
const auto *predict_addr = reinterpret_cast<T *>(inputs[0]->addr);
const auto *target_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto *result_addr = reinterpret_cast<T *>(outputs[0]->addr);
T zero = (T)0.0;
T half = (T)0.5;
T beta = (T)beta_;
@@ -56,20 +66,5 @@ bool SmoothL1LossCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp
CPUKernelUtils::ParallelFor(task, tensor_size_);
return true;
}

template <typename T>
void SmoothL1LossCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SmoothL1LossCPUKernel needs 2 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SmoothL1LossCPUKernel needs 1 output.";
}
if (beta_ == 0.0) {
MS_LOG(EXCEPTION) << "Attr beta can not be zero.";
}
}
} // namespace kernel
} // namespace mindspore

+ 2
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/smooth_l1_loss_cpu_kernel.h View File

@@ -37,10 +37,9 @@ class SmoothL1LossCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
float beta_ = 1.0;
float beta_{1.0};
TypeId dtype_{kTypeUnknown};
uint64_t tensor_size_ = 1;
uint64_t tensor_size_{1};
};

MS_REG_CPU_KERNEL_T(


+ 15
- 20
mindspore/ccsrc/backend/kernel_compiler/cpu/smooth_l1_loss_grad_cpu_kernel.cc View File

@@ -19,11 +19,19 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSmoothL1LossGradInputsNum = 3;
constexpr size_t kSmoothL1LossGradOutputsNum = 1;
} // namespace

template <typename T>
void SmoothL1LossGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
beta_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "beta");
CheckParam(kernel_node);
if (beta_ == 0.0) {
MS_LOG(EXCEPTION) << "Attr beta can not be zero.";
}
std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
for (const uint64_t &d : x_shape) {
tensor_size_ *= d;
@@ -34,10 +42,12 @@ template <typename T>
bool SmoothL1LossGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto predict_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto target_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto dloss_addr = reinterpret_cast<T *>(inputs[2]->addr);
auto result_addr = reinterpret_cast<T *>(outputs[0]->addr);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSmoothL1LossGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSmoothL1LossGradOutputsNum, kernel_name_);
const auto *predict_addr = reinterpret_cast<T *>(inputs[0]->addr);
const auto *target_addr = reinterpret_cast<T *>(inputs[1]->addr);
const auto *dloss_addr = reinterpret_cast<T *>(inputs[2]->addr);
auto *result_addr = reinterpret_cast<T *>(outputs[0]->addr);
T beta = (T)beta_;
for (uint64_t i = 0; i < tensor_size_; ++i) {
T diff = predict_addr[i] - target_addr[i];
@@ -51,20 +61,5 @@ bool SmoothL1LossGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr>
}
return true;
}

template <typename T>
void SmoothL1LossGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SmoothL1LossGradCPUKernel needs 3 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SmoothL1LossGradCPUKernel needs 1 output.";
}
if (beta_ == 0.0) {
MS_LOG(EXCEPTION) << "Attr beta can not be zero.";
}
}
} // namespace kernel
} // namespace mindspore

+ 0
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/smooth_l1_loss_grad_cpu_kernel.h View File

@@ -37,7 +37,6 @@ class SmoothL1LossGradCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
float beta_{1.0};
uint64_t tensor_size_{1};
};


+ 15
- 15
mindspore/ccsrc/backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.cc View File

@@ -15,27 +15,39 @@
*/

#include "backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.h"

#include <vector>

#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSpaceToDepthInputsNum = 1;
constexpr size_t kSpaceToDepthOutputsNum = 1;
constexpr size_t kSpaceToDepthInputShapeSize = 4;
constexpr size_t kSpaceToDepthMinBlockSize = 2;
} // namespace
template <typename T>
void SpaceToDepthCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);

input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
block_size_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size"));
if (input_shape_.size() != kSpaceToDepthInputShapeSize) {
MS_LOG(EXCEPTION) << "Input shape must be a 4-D tensor, but got " << input_shape_.size() << "-D";
}
if (block_size_ < kSpaceToDepthMinBlockSize) {
MS_LOG(EXCEPTION) << "The block size must be >= " << kSpaceToDepthMinBlockSize << ", but got " << block_size_;
}
}

template <typename T>
bool SpaceToDepthCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSpaceToDepthInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSpaceToDepthOutputsNum, kernel_name_);
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
size_t size = inputs[0]->size / sizeof(T);
@@ -75,17 +87,5 @@ bool SpaceToDepthCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp
CPUKernelUtils::ParallelFor(task, size);
return true;
}

template <typename T>
void SpaceToDepthCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but DepthToSpaceCPUKerrnel needs 1 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but DepthToSpaceCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 3
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.h View File

@@ -13,11 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPACETODEPTH_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPACETODEPTH_CPU_KERNEL_H_

#include <string>
#include <vector>

#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
@@ -33,10 +34,9 @@ class SpaceToDepthCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
size_t block_size_{0};
std::vector<size_t> input_shape_;
std::vector<size_t> output_shape_;
size_t block_size_{0};
};

MS_REG_CPU_KERNEL_T(


+ 15
- 15
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.cc View File

@@ -21,7 +21,8 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSparseApplyAdamInputSize = 11;
constexpr size_t kSparseApplyAdamInputsNum = 11;
constexpr size_t kSparseApplyAdamWorkspaceSize = 5;

template <typename T>
void ComputeAdam(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
@@ -100,6 +101,7 @@ void SparseApplyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node)

void SparseApplyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<size_t> m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
std::vector<size_t> v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
@@ -140,9 +142,9 @@ void SparseApplyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace) const {
auto var = reinterpret_cast<float *>(inputs[0]->addr);
auto m = reinterpret_cast<float *>(inputs[1]->addr);
auto v = reinterpret_cast<float *>(inputs[2]->addr);
auto *var = reinterpret_cast<float *>(inputs[0]->addr);
auto *m = reinterpret_cast<float *>(inputs[1]->addr);
auto *v = reinterpret_cast<float *>(inputs[2]->addr);
auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0];
if (beta1_power == 1) {
MS_LOG(EXCEPTION) << "The beta1_power should not be 1";
@@ -152,13 +154,13 @@ void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr
auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0];
auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0];
auto epsilon = reinterpret_cast<float *>(inputs[8]->addr)[0];
auto grad = reinterpret_cast<float *>(inputs[9]->addr);
auto indices = reinterpret_cast<T *>(inputs[10]->addr);
auto new_grad = reinterpret_cast<float *>(workspace[0]->addr);
auto new_indices = reinterpret_cast<T *>(workspace[1]->addr);
auto workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
auto workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);
auto m_t = reinterpret_cast<float *>(workspace[4]->addr);
auto *grad = reinterpret_cast<float *>(inputs[9]->addr);
auto *indices = reinterpret_cast<T *>(inputs[10]->addr);
auto *new_grad = reinterpret_cast<float *>(workspace[0]->addr);
auto *new_indices = reinterpret_cast<T *>(workspace[1]->addr);
auto *workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
auto *workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);
auto *m_t = reinterpret_cast<float *>(workspace[4]->addr);

SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_});
SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_});
@@ -180,7 +182,6 @@ void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr
input_params.beta1_ = beta1;
input_params.beta2_ = beta2;
MultiThreadCompute<T>(ComputeMomentum<T>, &input_params, total_dim_size);

input_params.m_t_ = m_t;
input_params.use_nesterov_ = use_nesterov_;
input_params.sparse_grad_ = unique_sparse_grad;
@@ -200,9 +201,8 @@ void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr
bool SparseApplyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &) {
if (inputs.size() < kSparseApplyAdamInputSize) {
MS_LOG(EXCEPTION) << "Error input size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyAdamInputsNum, kernel_name_);
CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyAdamWorkspaceSize, kernel_name_);
if (indices_data_type_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, workspace);
} else if (indices_data_type_ == kNumberTypeInt64) {


+ 10
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_

@@ -27,17 +28,21 @@ class SparseApplyAdamCPUKernel : public SparseOptimizerCPUKernel {
~SparseApplyAdamCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;
void InitInputOutputSize(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

protected:
bool use_nesterov_{false};

private:
void InitInputOutputSize(const CNodePtr &kernel_node) override;

template <typename T>
void InitWorkspaceSize();

template <typename T>
void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace) const;

protected:
bool use_nesterov_{false};
};

MS_REG_CPU_KERNEL(FusedSparseAdam,


+ 17
- 15
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.cc View File

@@ -21,7 +21,9 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSparseApplyFtrlInputSize = 5;
constexpr size_t kSparseApplyFtrlInputsNum = 5;
constexpr size_t kSparseApplyFtrlWorkspaceSize = 4;

template <typename T>
void ComputeFtrl(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
MS_EXCEPTION_IF_NULL(input_params);
@@ -74,8 +76,10 @@ void SparseApplyFtrlCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node)
CPUKernel::InitInputOutputSize(kernel_node);
if (indices_data_type_ == kNumberTypeInt32) {
InitWorkspaceSize<int>();
} else {
} else if (indices_data_type_ == kNumberTypeInt64) {
InitWorkspaceSize<int64_t>();
} else {
MS_LOG(EXCEPTION) << "Input data type " << indices_data_type_ << " is unsupported";
}
}

@@ -135,15 +139,15 @@ void SparseApplyFtrlCPUKernel::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
void SparseApplyFtrlCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace) const {
auto var = reinterpret_cast<float *>(inputs[0]->addr);
auto accum = reinterpret_cast<float *>(inputs[1]->addr);
auto linear = reinterpret_cast<float *>(inputs[2]->addr);
auto grad = reinterpret_cast<float *>(inputs[3]->addr);
auto indices = reinterpret_cast<T *>(inputs[4]->addr);
auto new_grad = reinterpret_cast<float *>(workspace[0]->addr);
auto new_indices = reinterpret_cast<T *>(workspace[1]->addr);
auto workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
auto workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);
auto *var = reinterpret_cast<float *>(inputs[0]->addr);
auto *accum = reinterpret_cast<float *>(inputs[1]->addr);
auto *linear = reinterpret_cast<float *>(inputs[2]->addr);
auto *grad = reinterpret_cast<float *>(inputs[3]->addr);
auto *indices = reinterpret_cast<T *>(inputs[4]->addr);
auto *new_grad = reinterpret_cast<float *>(workspace[0]->addr);
auto *new_indices = reinterpret_cast<T *>(workspace[1]->addr);
auto *workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
auto *workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);

SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_});
SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_});
@@ -173,10 +177,8 @@ void SparseApplyFtrlCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr
bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &) {
if (inputs.size() < kSparseApplyFtrlInputSize) {
MS_LOG(EXCEPTION) << "error input output size!";
}

CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyFtrlInputsNum, kernel_name_);
CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyFtrlWorkspaceSize, kernel_name_);
if (indices_data_type_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, workspace);
} else if (indices_data_type_ == kNumberTypeInt64) {


+ 13
- 8
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_

@@ -27,20 +28,24 @@ class SparseApplyFtrlCPUKernel : public SparseOptimizerCPUKernel {
~SparseApplyFtrlCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;
void InitInputOutputSize(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

protected:
float lr_{0.0};
float l1_{0.0};
float l2_{0.0};
float lr_power_{0.0};

private:
void InitInputOutputSize(const CNodePtr &kernel_node) override;

template <typename T>
void InitWorkspaceSize();

template <typename T>
void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace) const;

protected:
float lr_{0};
float l1_{0};
float l2_{0};
float lr_power_{0};
};

MS_REG_CPU_KERNEL(FusedSparseFtrl,


+ 19
- 17
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.cc View File

@@ -21,7 +21,8 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSparseApplyLazyAdamInputSize = 11;
constexpr size_t kSparseApplyLazyAdamInputsNum = 11;
constexpr size_t kSparseApplyLazyAdamWorkspaceSize = 4;

template <typename T>
void ComputeLazyAdam(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
@@ -70,13 +71,16 @@ void SparseApplyLazyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_no
CPUKernel::InitInputOutputSize(kernel_node);
if (indices_data_type_ == kNumberTypeInt32) {
InitWorkspaceSize<int>();
} else {
} else if (indices_data_type_ == kNumberTypeInt64) {
InitWorkspaceSize<int64_t>();
} else {
MS_LOG(EXCEPTION) << "Input data type " << indices_data_type_ << " is unsupported";
}
}

void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<size_t> m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
std::vector<size_t> v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
@@ -103,14 +107,14 @@ void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
var_outer_dim_size_ *= var_shape[i];
}
if (indices_shape.size() != 1) {
MS_LOG(EXCEPTION) << "Indices must be 1D!";
MS_LOG(EXCEPTION) << "Indices must be 1D";
}
indices_size_ = indices_shape[0];
if (grad_shape[0] != indices_size_) {
MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
}
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, USE_NESTEROV);
}
indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 10);
}
@@ -118,9 +122,9 @@ void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
void SparseApplyLazyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace) const {
auto var = reinterpret_cast<float *>(inputs[0]->addr);
auto m = reinterpret_cast<float *>(inputs[1]->addr);
auto v = reinterpret_cast<float *>(inputs[2]->addr);
auto *var = reinterpret_cast<float *>(inputs[0]->addr);
auto *m = reinterpret_cast<float *>(inputs[1]->addr);
auto *v = reinterpret_cast<float *>(inputs[2]->addr);
auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0];
if (beta1_power == 1) {
MS_LOG(EXCEPTION) << "The beta1_power should not be 1";
@@ -130,12 +134,12 @@ void SparseApplyLazyAdamCPUKernel::LaunchKernel(const std::vector<kernel::Addres
auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0];
auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0];
auto epsilon = reinterpret_cast<float *>(inputs[8]->addr)[0];
auto grad = reinterpret_cast<float *>(inputs[9]->addr);
auto indices = reinterpret_cast<T *>(inputs[10]->addr);
auto new_grad = reinterpret_cast<float *>(workspace[0]->addr);
auto new_indices = reinterpret_cast<T *>(workspace[1]->addr);
auto workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
auto workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);
auto *grad = reinterpret_cast<float *>(inputs[9]->addr);
auto *indices = reinterpret_cast<T *>(inputs[10]->addr);
auto *new_grad = reinterpret_cast<float *>(workspace[0]->addr);
auto *new_indices = reinterpret_cast<T *>(workspace[1]->addr);
auto *workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
auto *workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);

SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_});
SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_});
@@ -167,10 +171,8 @@ void SparseApplyLazyAdamCPUKernel::LaunchKernel(const std::vector<kernel::Addres
bool SparseApplyLazyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &) {
if (inputs.size() < kSparseApplyLazyAdamInputSize) {
MS_LOG(EXCEPTION) << "Error input size!";
}

CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyLazyAdamInputsNum, kernel_name_);
CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyLazyAdamWorkspaceSize, kernel_name_);
if (indices_data_type_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, workspace);
} else if (indices_data_type_ == kNumberTypeInt64) {


+ 9
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_lazy_adam_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_CPU_KERNEL_H_

@@ -27,17 +28,20 @@ class SparseApplyLazyAdamCPUKernel : public SparseOptimizerCPUKernel {
~SparseApplyLazyAdamCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;
void InitInputOutputSize(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

protected:
void InitInputOutputSize(const CNodePtr &kernel_node) override;
bool use_nesterov_{false};

private:
template <typename T>
void InitWorkspaceSize();

template <typename T>
void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace) const;

protected:
bool use_nesterov_{false};
};

MS_REG_CPU_KERNEL(FusedSparseLazyAdam,


+ 8
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc View File

@@ -21,7 +21,8 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSparseApplyProximalAdagradInputSize = 7;
constexpr size_t kSparseApplyProximalAdagradInputsNum = 7;
constexpr size_t kSparseApplyProximalAdagradWorkspaceSize = 4;

template <typename T>
void ComputeProximalAdagrad(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
@@ -70,13 +71,16 @@ void SparseApplyProximalAdagradCPUKernel::InitInputOutputSize(const CNodePtr &ke
CPUKernel::InitInputOutputSize(kernel_node);
if (indices_data_type_ == kNumberTypeInt32) {
InitWorkspaceSize<int>();
} else {
} else if (indices_data_type_ == kNumberTypeInt64) {
InitWorkspaceSize<int64_t>();
} else {
MS_LOG(EXCEPTION) << "Input data type " << indices_data_type_ << " is unsupported";
}
}

void SparseApplyProximalAdagradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
std::vector<size_t> accum_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
std::vector<size_t> lr_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
@@ -160,9 +164,8 @@ void SparseApplyProximalAdagradCPUKernel::LaunchKernel(const std::vector<kernel:
bool SparseApplyProximalAdagradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &) {
if (inputs.size() < kSparseApplyProximalAdagradInputSize) {
MS_LOG(EXCEPTION) << "Wrong input size!";
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyProximalAdagradInputsNum, kernel_name_);
CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyProximalAdagradWorkspaceSize, kernel_name_);
if (indices_data_type_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, workspace);
} else if (indices_data_type_ == kNumberTypeInt64) {


+ 8
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_PROXIMAL_ADAGRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_PROXIMAL_ADAGRAD_CPU_KERNEL_H_

@@ -27,11 +28,16 @@ class SparseApplyProximalAdagradCPUKernel : public SparseOptimizerCPUKernel {
~SparseApplyProximalAdagradCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;
void InitInputOutputSize(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

protected:
void InitInputOutputSize(const CNodePtr &kernel_node) override;

private:
template <typename T>
void InitWorkspaceSize();

template <typename T>
void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace) const;


+ 4
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_OPTIMIZER_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_OPTIMIZER_CPU_KERNEL_H_

@@ -63,6 +64,7 @@ struct MultiThreadComputeParams {
size_t var_outer_dim_size_{0};
bool use_nesterov_;
};

template <typename T>
using MultiThreadComputeFunc = std::function<void(MultiThreadComputeParams<T> *param, size_t start, size_t end)>;

@@ -205,7 +207,7 @@ class SparseOptimizerCPUKernel : public CPUKernel {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(segment);
MS_EXCEPTION_IF_NULL(segment->indices_);
if (param.thread_num_ < 1) {
if (param.thread_num_ == 0) {
MS_EXCEPTION(ArgumentError) << "Input param thread num must > 0!";
}
std::vector<size_t> bucket_data_num(param.thread_num_, 0);


+ 16
- 10
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc View File

@@ -20,12 +20,18 @@
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSparseTensorDenseMatmulInputsNum = 4;
constexpr size_t kSparseTensorDenseMatmulOutputsNum = 1;
constexpr size_t kSparseTensorDenseMatmulOutputShapeSize = 2;
constexpr size_t kSparseTensorDenseMatmulDenseShapeSize = 2;
constexpr size_t kIndicesSizeNum = 2;
constexpr size_t kIndices2rdDimNum = 2;
} // namespace

template <typename I, typename T>
void SparseTensorDenseMatmulCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
adj_st_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_ST);
adj_dt_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_dT);
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, INDICES);
@@ -59,11 +65,8 @@ template <typename I, typename T>
bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != kInputNum || outputs.size() != kOutputNum) {
MS_LOG(ERROR) << "SparseTensorDenseMatmul requires 4 inputs and 1 output, but got " << inputs.size()
<< " inputs and " << outputs.size() << " output.";
return false;
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseTensorDenseMatmulInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseTensorDenseMatmulOutputsNum, kernel_name_);
if (outputs[0]->size == 0) {
MS_LOG(WARNING) << "SparseTensorDenseMatmul output memory size should be greater than 0, but got 0.";
return true;
@@ -72,13 +75,16 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad
MS_LOG(EXCEPTION) << "SparseTensorDenseMatmul memset output failed!";
}

const size_t b_index = 3;
const auto *a_indices = reinterpret_cast<I *>(inputs[0]->addr);
const auto *a_values = reinterpret_cast<T *>(inputs[1]->addr);
const auto *b = reinterpret_cast<T *>(inputs[3]->addr);
const auto *b = reinterpret_cast<T *>(inputs[b_index]->addr);
auto *out = reinterpret_cast<T *>(outputs[0]->addr);
const size_t indices_length = inputs[0]->size / sizeof(I);
const size_t values_length = inputs[1]->size / sizeof(T);
const size_t b_length = inputs[3]->size / sizeof(T);
const size_t b_length = inputs[b_index]->size / sizeof(T);

const size_t dim_num = 2;
const size_t out_dim_0 = output_shape_[0];
const size_t out_dim_1 = output_shape_[1];
const size_t b_dim_0 = b_shape_[0];
@@ -86,14 +92,14 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad
const size_t same_dim = adj_dt_ ? b_dim_1 : b_dim_0;

for (size_t i = 0; i < values_size_; ++i) {
if (i * 2 + 1 >= indices_length) { // the interval is 2
if (i * dim_num + 1 >= indices_length) {
MS_LOG(EXCEPTION) << "The index of a_indices out of bounds.";
}
if (i >= values_length) {
MS_LOG(EXCEPTION) << "The index of a_values out of bounds.";
}
const int row = adj_st_ ? a_indices[i * 2 + 1] : a_indices[i * 2];
const int col = adj_st_ ? a_indices[i * 2] : a_indices[i * 2 + 1];
const int row = adj_st_ ? a_indices[i * dim_num + 1] : a_indices[i * dim_num];
const int col = adj_st_ ? a_indices[i * dim_num] : a_indices[i * dim_num + 1];
if (row >= SizeToInt(out_dim_0) || row < 0 || col >= SizeToInt(same_dim) || col < 0) {
MS_EXCEPTION(ValueError) << "The indices including out of bounds index, row range: [0, " << out_dim_0
<< "), col range: [0, " << same_dim << "), but got row: " << row << ", col: " << col;


+ 0
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.h View File

@@ -23,10 +23,6 @@

namespace mindspore {
namespace kernel {
constexpr size_t kInputNum = 4;
constexpr size_t kOutputNum = 1;
constexpr size_t kIndicesSizeNum = 2;
constexpr size_t kIndices2rdDimNum = 2;
template <typename I, typename T>
class SparseTensorDenseMatmulCPUKernel : public CPUKernel {
public:


+ 5
- 18
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.cc View File

@@ -22,12 +22,14 @@ namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kIndicesShapeSize = 2;
constexpr size_t kSparseToDenseInputsNum = 3;
constexpr size_t kSparseToDenseOutputsNum = 1;
} // namespace

template <typename I, typename T>
void SparseToDenseCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (indices_shape.size() != kIndicesShapeSize) {
MS_LOG(EXCEPTION) << "SparseToDense requires 'indices' should be a " << kIndicesShapeSize << "-D Tensor, but got "
@@ -48,11 +50,8 @@ template <typename I, typename T>
bool SparseToDenseCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != 3 || outputs.size() != 1) {
MS_LOG(ERROR) << "SparseToDense requires 3 inputs and 1 output, but got " << inputs.size() << " inputs and "
<< outputs.size() << " output.";
return false;
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseToDenseInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseToDenseOutputsNum, kernel_name_);
if (outputs[0]->size == 0) {
MS_LOG(WARNING) << "SparseToDense output memory size should be greater than 0, but got 0.";
return true;
@@ -92,17 +91,5 @@ bool SparseToDenseCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr>
}
return true;
}

template <typename I, typename T>
void SparseToDenseCPUKernel<I, T>::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "SparseToDense needs 3 inputs, but got " << input_num;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "SparseToDense should have 2 outputs, but got " << output_num;
}
}
} // namespace kernel
} // namespace mindspore

+ 0
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_to_dense_cpu_kernal.h View File

@@ -37,7 +37,6 @@ class SparseToDenseCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> output_shape_;
size_t values_size_{0};
};


+ 13
- 15
mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc View File

@@ -21,11 +21,16 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSplitInputsNum = 1;
} // namespace

template <typename T>
void SplitCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "axis");
output_num_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "output_num");
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
output_num_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "output_num"));
if (output_num_ == 0) {
MS_LOG(EXCEPTION) << "Attr output_num is equal to 0";
}
@@ -49,6 +54,8 @@ template <typename T>
bool SplitCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSplitInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), output_num_, kernel_name_);
LaunchKernel(inputs, workspace, outputs);
return true;
}
@@ -56,7 +63,7 @@ bool SplitCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
template <typename T>
void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t /* size */) {
SplitParameter param;
param.num_split_ = LongToInt(output_num_);
param.num_split_ = SizeToInt(output_num_);
param.split_dim_ = LongToInt(axis_);
param.strides_[input_shape_.size() - 1] = 1;
for (int i = SizeToInt(input_shape_.size()) - 2; i >= 0; i--) { // from -2 to 0 dim
@@ -64,7 +71,7 @@ void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t /* size */) {
}
auto split_sizes = std::make_unique<int[]>(IntToSize(param.num_split_));
param.split_sizes_ = split_sizes.get();
int split_size = input_shape_[param.split_dim_] / output_num_;
int split_size = input_shape_[param.split_dim_] / SizeToInt(output_num_);
for (int i = 0; i < param.num_split_; i++) {
param.split_sizes_[i] = split_size;
}
@@ -96,13 +103,7 @@ void SplitCPUKernel<T>::LaunchKernel(const std::vector<AddressPtr> &inputs,

template <typename T>
void SplitCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
auto input_num = AnfAlgo::GetInputTensorNum(kernel_node);
int64_t dims = SizeToLong(input_shape_.size());
int64_t output_num = SizeToLong(AnfAlgo::GetOutputTensorNum(kernel_node));

if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Split needs 1 input.";
}
if (dims == 0 || dims > SPLIT_STRIDES_SIZE) {
MS_LOG(EXCEPTION) << "Input dims is " << dims << ", scalar is not supported.";
}
@@ -110,14 +111,11 @@ void SplitCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << "Attr axis_ " << axis_ << " must be in " << -dims << "~" << dims;
}
if (axis_ < 0) {
axis_ += SizeToInt(input_shape_.size());
axis_ += SizeToLong(input_shape_.size());
}
if (output_num_ > IntToLong(input_shape_[LongToUlong(axis_)])) {
if (output_num_ > IntToSize(input_shape_[LongToUlong(axis_)])) {
MS_LOG(EXCEPTION) << "Attr output_num " << output_num_ << " must less than " << input_shape_[axis_];
}
if (output_num_ != output_num) {
MS_LOG(EXCEPTION) << "Output num is " << output_num << ", but need " << output_num_;
}
}
} // namespace kernel
} // namespace mindspore

+ 7
- 13
mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.h View File

@@ -37,25 +37,19 @@ class SplitCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs);

void InitInputOutputSize(const CNodePtr &kernel_node) override;

private:
void CheckParam(const CNodePtr &kernel_node);

void LaunchSplit(T *input, T **output, size_t size);
int64_t axis_{1};
int64_t output_num_{1};
int64_t axis_step_{1};

size_t input_size_{1};
size_t dims_after_axis_{1};
size_t dims_current_after_axis_{1};
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs);

void InitInputOutputSize(const CNodePtr &kernel_node) override;

std::vector<std::vector<size_t>> output_shape_list_;
int64_t axis_{0};
size_t output_num_{1};
std::vector<int> input_shape_;
TypeId dtype_{kTypeUnknown};
};

MS_REG_CPU_KERNEL_T(Split, KernelAttr(), SplitCPUKernel, float);


+ 33
- 32
mindspore/ccsrc/backend/kernel_compiler/cpu/stridedslice_cpu_kernel.cc View File

@@ -24,21 +24,25 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kStridedSliceInputsNum = 1;
constexpr size_t kStridedSliceOutputsNum = 1;
} // namespace

enum PosType { kBegin, kEnd };

int NormalizePos(int pos, int dim_len, PosType pos_type) {
if (pos < 0) {
int normal_pos = pos + dim_len;
int threshold = pos_type == kBegin ? 0 : -1;
normal_pos = std::max(normal_pos, threshold);
return normal_pos;
if (pos >= 0) {
int max_pos = pos_type == kBegin ? dim_len - 1 : dim_len;
return std::min(pos, max_pos);
}
int max_pos = pos_type == kBegin ? dim_len - 1 : dim_len;
return std::min(pos, max_pos);
int min_pos = pos_type == kBegin ? 0 : -1;
return std::max(pos + dim_len, min_pos);
}

void StridedSliceCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (input_shape_.size() > DIMENSION_8D || input_shape_.empty()) {
@@ -70,18 +74,17 @@ bool StridedSliceCPUKernel::MatchParallelPattern() {
// Example 2:
// input shape info: [1, 46, 40]
// output shape info: [1, 20, 40]
if (input_shape_.size() != output_shape_.size()) {
return false;
}
std::vector<int> axis_list;
for (size_t i = 0; i < input_shape_.size(); ++i) {
if (input_shape_[i] != output_shape_[i]) {
(void)axis_list.emplace_back(i);
if (input_shape_.size() == output_shape_.size()) {
std::vector<int> axis_list;
for (size_t i = 0; i < input_shape_.size(); ++i) {
if (input_shape_[i] != output_shape_[i]) {
(void)axis_list.emplace_back(i);
}
}
if (axis_list.size() == 1) {
split_axis_ = axis_list.front();
return true;
}
}
if (axis_list.size() == 1) {
split_axis_ = axis_list.front();
return true;
}
return false;
}
@@ -123,8 +126,9 @@ void StridedSliceCPUKernel::InitSliceParam(const std::vector<int64_t> &begin, co
slice_param_.data_type = type_pair->second.first;

for (size_t i = 0; i < DIMENSION_8D; i++) {
int dim_len;
if (i < begin.size()) {
int dim_len = SizeToInt(input_shape_[i]);
dim_len = SizeToInt(input_shape_[i]);
int begin_pos = LongToInt(begin[i]);
int end_pos = LongToInt(end[i]);
int stride_size = LongToInt(stride[i]);
@@ -142,7 +146,7 @@ void StridedSliceCPUKernel::InitSliceParam(const std::vector<int64_t> &begin, co
slice_param_.ends_[i] = slice_param_.begins_[i] - 1;
}
} else if (i < input_shape_.size()) {
int dim_len = SizeToInt(input_shape_[i]);
dim_len = SizeToInt(input_shape_[i]);
slice_param_.in_shape_[i] = dim_len;
slice_param_.begins_[i] = 0;
slice_param_.ends_[i] = dim_len;
@@ -158,10 +162,10 @@ void StridedSliceCPUKernel::InitSliceParam(const std::vector<int64_t> &begin, co
slice_param_.num_axes_ = DIMENSION_8D;
}

int StridedSliceCPUKernel::RunTaskOnOuter(uint8_t *input_addr, uint8_t *output_addr, int start_pos) {
int StridedSliceCPUKernel::RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos) {
int begin_index = slice_param_.begins_[split_axis_];
int inner_size = inner_ * data_size_;
uint8_t *cur_in_ptr = input_addr + (start_pos * input_shape_[split_axis_] + begin_index) * inner_size;
const uint8_t *cur_in_ptr = input_addr + (start_pos * input_shape_[split_axis_] + begin_index) * inner_size;
uint8_t *cur_out_ptr = output_addr + start_pos * output_shape_[split_axis_] * inner_size;
int cur_outer = outer_ - start_pos;
if (cur_outer <= 0) {
@@ -173,10 +177,10 @@ int StridedSliceCPUKernel::RunTaskOnOuter(uint8_t *input_addr, uint8_t *output_a
return common::SUCCESS;
}

int StridedSliceCPUKernel::RunTaskOnSplitAxis(uint8_t *input_addr, uint8_t *output_addr, int start_pos) {
int StridedSliceCPUKernel::RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos) {
int begin_index = slice_param_.begins_[split_axis_];
int inner_size = inner_ * data_size_;
uint8_t *cur_in_ptr = input_addr + (start_pos * slice_param_.strides_[split_axis_] + begin_index) * inner_size;
const uint8_t *cur_in_ptr = input_addr + (start_pos * slice_param_.strides_[split_axis_] + begin_index) * inner_size;
uint8_t *cur_out_ptr = output_addr + start_pos * inner_size;
int cal_axis_num = output_shape_[split_axis_] - start_pos;
if (cal_axis_num <= 0) {
@@ -187,10 +191,10 @@ int StridedSliceCPUKernel::RunTaskOnSplitAxis(uint8_t *input_addr, uint8_t *outp
return common::SUCCESS;
}

void StridedSliceCPUKernel::ParallelRun(uint8_t *input_addr, uint8_t *output_addr, int thread_num) {
void StridedSliceCPUKernel::ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread_num) {
int thread_index = 0;
std::vector<common::Task> tasks;
std::function<int(StridedSliceCPUKernel *, uint8_t *, uint8_t *, int)> execute_func;
std::function<int(StridedSliceCPUKernel *, const uint8_t *, uint8_t *, int)> execute_func;
if (parallel_strategy_ == kOnOuter) {
execute_func = &StridedSliceCPUKernel::RunTaskOnOuter;
} else if (parallel_strategy_ == kOnSplitAxis) {
@@ -208,13 +212,10 @@ void StridedSliceCPUKernel::ParallelRun(uint8_t *input_addr, uint8_t *output_add
}

bool StridedSliceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != 1 || outputs.size() != 1) {
MS_LOG(ERROR) << "StridedSlice requires 1 input and 1 output, but got " << inputs.size() << " input and "
<< outputs.size() << " output.";
return false;
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kStridedSliceInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kStridedSliceOutputsNum, kernel_name_);
if (outputs[0]->size == 0) {
MS_LOG(WARNING) << "StridedSlice output memory size should be greater than 0, but got 0.";
return true;


+ 6
- 7
mindspore/ccsrc/backend/kernel_compiler/cpu/stridedslice_cpu_kernel.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_CPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_STRIDESLICE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_STRIDESLICE_CPU_KERNEL_H_

#include <vector>
#include <memory>
@@ -37,14 +37,13 @@ class StridedSliceCPUKernel : public CPUKernel {

private:
enum ParallelStrategy { kOnSplitAxis, kOnOuter };

void InitSliceParam(const std::vector<int64_t> &begin, const std::vector<int64_t> &end,
const std::vector<int64_t> &stride);
bool MatchParallelPattern();
void InitParallelParam();
void ParallelRun(uint8_t *input_addr, uint8_t *output_addr, int thread_num);
int RunTaskOnOuter(uint8_t *input_addr, uint8_t *output_addr, int start_pos);
int RunTaskOnSplitAxis(uint8_t *input_addr, uint8_t *output_addr, int start_pos);
void ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread_num);
int RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos);
int RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos);

TypeId dtype_;
int data_size_{4};
@@ -70,4 +69,4 @@ MS_REG_CPU_KERNEL(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeFloat64).Ad
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_STRIDESLICE_CPU_KERNEL_H_

+ 14
- 8
mindspore/ccsrc/backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.cc View File

@@ -20,8 +20,14 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kSubAndFilterInputsNum = 3;
constexpr size_t kSubAndFilterOutputNum = 2;
} // namespace

void SubAndFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
node_wpt_ = kernel_node;
input_x_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
}
@@ -29,6 +35,8 @@ void SubAndFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool SubAndFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSubAndFilterInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSubAndFilterOutputNum, kernel_name_);
if (input_x_dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else if (input_x_dtype_ == kNumberTypeInt64) {
@@ -42,11 +50,9 @@ bool SubAndFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs
template <typename T>
void SubAndFilterCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
auto node_ = node_wpt_.lock();
if (!node_) {
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
}
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0);
auto node = node_wpt_.lock();
MS_EXCEPTION_IF_NULL(node);
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);

batch_size_ = 1;
for (size_t i = 0; i < indices_shape.size(); ++i) {
@@ -71,12 +77,12 @@ void SubAndFilterCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
MS_LOG(INFO) << "SubAndFilter output count is " << count;
std::vector<size_t> out_shape;
(void)out_shape.emplace_back(count);
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
size_t output_num = AnfAlgo::GetOutputTensorNum(node);
std::vector<TypeId> dtypes(output_num);
for (size_t i = 0; i < output_num; i++) {
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node, i);
}
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, out_shape}, node_.get());
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, out_shape}, node.get());
}
} // namespace kernel
} // namespace mindspore

+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.h View File

@@ -35,10 +35,10 @@ class SubAndFilterCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);

private:
size_t batch_size_{1};
TypeId input_x_dtype_{kTypeUnknown};
CNodeWeakPtr node_wpt_;


+ 8
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/tensor_copy_slices_cpu_kernel.cc View File

@@ -23,8 +23,14 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kTensorCopySlicesInputsNum = 2;
constexpr size_t kTensorCopySlicesOutputsNum = 1;
} // namespace

void TensorCopySlicesCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
@@ -48,11 +54,8 @@ void TensorCopySlicesCPUKernel::InitKernel(const CNodePtr &kernel_node) {
bool TensorCopySlicesCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != 2 || outputs.size() != 1) {
MS_LOG(ERROR) << "TensorCopySlices requires 1 input and 1 output, but got " << inputs.size() << " input and "
<< outputs.size() << " output.";
return false;
}
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kTensorCopySlicesInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kTensorCopySlicesOutputsNum, kernel_name_);

auto input_addr = reinterpret_cast<uint8_t *>(inputs[0]->addr);
auto update_addr = reinterpret_cast<uint8_t *>(inputs[1]->addr);


+ 9
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/tensoradd_cpu_kernel.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,9 +20,15 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kTensorAddInputsSize = 2;
constexpr size_t kTensorAddOutputsSize = 1;
} // namespace

template <typename T>
void TensorAddCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
// Init shape ans strides
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
@@ -33,6 +39,8 @@ template <typename T>
bool TensorAddCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kTensorAddInputsSize, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kTensorAddOutputsSize, kernel_name_);
T *input_addr_a = reinterpret_cast<T *>(inputs[0]->addr);
T *input_addr_b = reinterpret_cast<T *>(inputs[1]->addr);
T *output_addr = reinterpret_cast<T *>(outputs[0]->addr);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/tensoradd_cpu_kernel.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.


+ 15
- 23
mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.cc View File

@@ -20,10 +20,15 @@

namespace mindspore {
namespace kernel {
void TileCPUKernel::TileMultipleCompute(void) {
namespace {
constexpr size_t kTileInputsNum = 1;
constexpr size_t kTileOutputsNum = 1;
} // namespace

void TileCPUKernel::TileMultipleCompute() {
int large_one_multiple_count_ = 0;
int multiple = 0;
int mul_index = 0;
size_t mul_index = 0;
for (size_t i = 0; i < multiples_.size(); i++) {
tile_parameter_.multiples_[i] = multiples_[i];
if (tile_parameter_.multiples_[i] > 1) {
@@ -47,6 +52,10 @@ void TileCPUKernel::TileMultipleCompute(void) {
void TileCPUKernel::TileTensorParamrInit(const CNodePtr &kernel_node) {
x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
y_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (x_shape_.size() > MAX_TILE_DIM_SIZE || x_shape_.size() > y_shape_.size()) {
MS_LOG(EXCEPTION) << "Tile input shape should not be greater than default max size :" << MAX_TILE_DIM_SIZE
<< " and output shape : " << y_shape_.size() << ", but got input shape " << x_shape_.size();
}
std::vector<int64_t> multiples_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "multiples");
(void)std::transform(multiples_me.begin(), multiples_me.end(), std::back_inserter(multiples_),
[](const int64_t &value) { return LongToInt(value); });
@@ -54,17 +63,9 @@ void TileCPUKernel::TileTensorParamrInit(const CNodePtr &kernel_node) {
size_t ones = multiples_.size() - x_shape_.size();
if (ones > 0) {
for (size_t i = 0; i < ones; ++i) {
x_shape_.insert(x_shape_.begin(), 1);
(void)x_shape_.insert(x_shape_.begin(), 1);
}
}
if (x_shape_.size() > MAX_TILE_DIM_SIZE) {
MS_LOG(EXCEPTION) << "Input shape size should not greater than " << MAX_TILE_DIM_SIZE << ", but got "
<< x_shape_.size();
}
if (y_shape_.size() < x_shape_.size()) {
MS_LOG(EXCEPTION) << "Output shape size should not less than input shape size, but got output shape: " << y_shape_
<< ", input shape: " << x_shape_;
}

input_size_ = 1;
tile_parameter_.in_dim_ = x_shape_.size();
@@ -88,7 +89,7 @@ void TileCPUKernel::TileTensorParamrInit(const CNodePtr &kernel_node) {

void TileCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
TileTensorParamrInit(kernel_node);

launch_map_[kNumberTypeInt8] = &TileCPUKernel::LaunchKernel<int8_t>;
@@ -112,6 +113,8 @@ void TileCPUKernel::InitKernel(const CNodePtr &kernel_node) {

bool TileCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kTileInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kTileOutputsNum, kernel_name_);
launch_func_(this, inputs, outputs);
return true;
}
@@ -132,16 +135,5 @@ void TileCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st

Tile(x_addr, y_addr, &tile_parameter_);
}

void TileCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but TileCPUKernel needs 1 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but TileCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 3
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.h View File

@@ -36,6 +36,7 @@ class TileCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

@@ -43,8 +44,6 @@ class TileCPUKernel : public CPUKernel {

void TileMultipleCompute(void);

private:
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> x_shape_;
std::vector<size_t> y_shape_;
std::vector<int> multiples_;
@@ -54,8 +53,8 @@ class TileCPUKernel : public CPUKernel {
std::unordered_map<TypeId, TypeKernel> launch_map_;
TypeKernel launch_func_;
TileParameter tile_parameter_;
bool one_dim_tile_;
size_t input_size_;
bool one_dim_tile_{false};
size_t input_size_{0};
};

MS_REG_CPU_KERNEL(Tile, KernelAttr(), TileCPUKernel);


+ 9
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/topk_cpu_kernel.cc View File

@@ -21,6 +21,11 @@

namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kTopKInputsNum = 2;
constexpr size_t kTopKOutputsNum = 2;
} // namespace

template <typename T>
void TopKCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces,
const std::vector<AddressPtr> &outputs) {
@@ -87,8 +92,8 @@ void TopKCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st
void TopKCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
auto x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (x_shape_.size() < 1) {
MS_LOG(EXCEPTION) << "Input shape size should not less than 1";
if (x_shape_.empty()) {
MS_LOG(EXCEPTION) << "Input shape is empty";
}
for (size_t i = 0; i < x_shape_.size() - 1; ++i) {
outer_size_ *= x_shape_[i];
@@ -107,6 +112,8 @@ void TopKCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
bool TopKCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspaces,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kTopKInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kTopKOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, workspaces, outputs);
} else if (dtype_ == kNumberTypeFloat32) {


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save