Browse Source

!22236 clean code in cpu operater

Merge pull request !22236 from liangxhao/master_cleancode
tags/v1.5.0-rc1
i-robot Gitee 4 years ago
parent
commit
e06e83607e
37 changed files with 114 additions and 91 deletions
  1. +5
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
  2. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
  3. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc
  4. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h
  5. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc
  6. +1
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc
  7. +0
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
  8. +3
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/depthtospace_cpu_kernel.cc
  9. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/depthtospace_cpu_kernel.h
  10. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
  11. +2
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
  12. +6
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/l2_normalize_cpu_kernel.cc
  13. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/l2_normalize_cpu_kernel.h
  14. +5
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.cc
  15. +7
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.cc
  16. +2
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.h
  17. +12
    -10
      mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_grad_cpu_kernel.cc
  18. +3
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_grad_cpu_kernel.h
  19. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_cpu_kernel.cc
  20. +7
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc
  21. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/print_cpu_kernel.cc
  22. +3
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.h
  23. +0
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/rmsprop_cpu_kernel.cc
  24. +5
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.cc
  25. +3
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.h
  26. +6
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
  27. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
  28. +3
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.cc
  29. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.h
  30. +7
    -7
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc
  31. +5
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.h
  32. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc
  33. +4
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/strided_slice_grad_cpu_kernel.cc
  34. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/strided_slice_grad_cpu_kernel.h
  35. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/tensor_copy_slices_cpu_kernel.cc
  36. +3
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.cc
  37. +1
    -1
      model_zoo/official/cv/centerface/ascend310_infer/src/main.cc

+ 5
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc View File

@@ -406,7 +406,7 @@ void ArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
input_shape2_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (output_shape_.size() == 0) {
output_shape_.insert(output_shape_.begin(), 1);
(void)output_shape_.insert(output_shape_.begin(), 1);
}

output_size_ = 1;
@@ -426,11 +426,11 @@ void ArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {

size_t l = input_shape1_.size();
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
input_shape1_.insert(input_shape1_.begin(), 1);
(void)input_shape1_.insert(input_shape1_.begin(), 1);
}
l = input_shape2_.size();
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
input_shape2_.insert(input_shape2_.begin(), 1);
(void)input_shape2_.insert(input_shape2_.begin(), 1);
}
CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_);
CPUKernelUtils::GetElementNumEveryDim(input_shape2_, &input_element_num2_);
@@ -443,7 +443,8 @@ void ArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
}

template <typename T>
bool ArithmeticCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool ArithmeticCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> & /* workspace */,
const std::vector<AddressPtr> &outputs) {
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h View File

@@ -58,7 +58,7 @@ class ArithmeticCPUKernel : public CPUKernel {
std::vector<size_t> input_element_num2_;
std::vector<size_t> output_shape_;
std::vector<size_t> output_element_num_;
size_t output_size_;
size_t output_size_{1};
ArithmeticParameter op_para;
OperateType operate_type_{ADD};
TypeId dtype_{kTypeUnknown};


+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc View File

@@ -177,7 +177,7 @@ void ArithmeticLogicCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
input_shape2_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (output_shape_.size() == 0) {
output_shape_.insert(output_shape_.begin(), 1);
(void)output_shape_.insert(output_shape_.begin(), 1);
}

output_size_ = 1;
@@ -187,11 +187,11 @@ void ArithmeticLogicCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {

size_t l = input_shape1_.size();
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
input_shape1_.insert(input_shape1_.begin(), 1);
(void)input_shape1_.insert(input_shape1_.begin(), 1);
}
l = input_shape2_.size();
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
input_shape2_.insert(input_shape2_.begin(), 1);
(void)input_shape2_.insert(input_shape2_.begin(), 1);
}
CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_);
CPUKernelUtils::GetElementNumEveryDim(input_shape2_, &input_element_num2_);
@@ -205,7 +205,7 @@ void ArithmeticLogicCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {

template <typename T>
bool ArithmeticLogicCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> & /* workspace */,
const std::vector<AddressPtr> &outputs) {
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h View File

@@ -52,7 +52,7 @@ class ArithmeticLogicCPUKernel : public CPUKernel {
std::vector<size_t> input_element_num2_;
std::vector<size_t> output_shape_;
std::vector<size_t> output_element_num_;
size_t output_size_;
size_t output_size_{1};
OperateType operate_type_{ADD};
TypeId dtype_{kTypeUnknown};
TypeId target_dtype_{kTypeUnknown};


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc View File

@@ -264,7 +264,7 @@ void Atanh(const T *in, T *out, size_t size) {

template <typename T>
void Identity(const T *in, T *out, size_t size) {
std::copy(in, in + size, out);
(void)std::copy(in, in + size, out);
}
} // namespace



+ 1
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc View File

@@ -26,7 +26,6 @@ void BroadcastToCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
size_t input_shape_size = input_shape_.size();
size_t output_shape_size = output_shape_.size();

if (output_shape_size < input_shape_size) {
MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_
<< " to a smaller dimension shape " << output_shape_ << ".";
@@ -68,7 +67,7 @@ bool BroadcastToCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, cons

const auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
int ret = NNACL_ERR;
int ret = static_cast<int>(NNACL_ERR);
if constexpr (std::is_same_v<T, bool>) {
ret = BroadcastTo(bool, input_addr, &shape_info_, output_addr);
} else if constexpr (std::is_same_v<T, int>) {
@@ -86,6 +85,5 @@ bool BroadcastToCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, cons
<< " execute failed.";
return false;
}

} // namespace kernel
} // namespace mindspore

+ 0
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc View File

@@ -370,6 +370,5 @@ std::vector<size_t> CPUKernelUtils::GetBroadcastShape(const std::vector<size_t>
}
return broadcast_shape;
}

} // namespace kernel
} // namespace mindspore

+ 3
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/depthtospace_cpu_kernel.cc View File

@@ -28,16 +28,16 @@ void DepthToSpaceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
block_size_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size");
block_size_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size"));
}

template <typename T>
bool DepthToSpaceCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
size_t size = IntToSize(inputs[0]->size / sizeof(T));
size_t size = inputs[0]->size / sizeof(T);
std::vector<size_t> input_shape = input_shape_;
std::vector<size_t> output_shape = output_shape_;
size_t block_size = block_size_;


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/depthtospace_cpu_kernel.h View File

@@ -37,7 +37,7 @@ class DepthToSpaceCPUKernel : public CPUKernel {
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> input_shape_;
std::vector<size_t> output_shape_;
size_t block_size_;
size_t block_size_{0};
};

MS_REG_CPU_KERNEL_T(


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc View File

@@ -246,7 +246,7 @@ bool EltWiseGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inpu
{prim::kPrimAsinhGrad->name(), &EltWiseGradCPUKernel<T>::AsinhGrad},
{prim::kPrimAcoshGrad->name(), &EltWiseGradCPUKernel<T>::AcoshGrad},
{prim::kPrimSoftplusGrad->name(), &EltWiseGradCPUKernel<T>::SoftplusGrad}};
if (inputs.size() < 2 || outputs.size() != 1) {
if (inputs.size() < kInputMinNum || outputs.size() != kOutputNum) {
MS_LOG(ERROR) << kernel_name_ << " requires at least 2 inputs and 1 output, but got " << inputs.size()
<< " inputs and " << outputs.size() << " output.";
return false;


+ 2
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h View File

@@ -24,6 +24,8 @@

namespace mindspore {
namespace kernel {
constexpr size_t kInputMinNum = 2;
constexpr size_t kOutputNum = 1;
template <typename T>
class EltWiseGradCPUKernel : public CPUKernel {
public:


+ 6
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/l2_normalize_cpu_kernel.cc View File

@@ -39,18 +39,19 @@ void L2NormalizeCPUKernel<T>::CalcDenominator(const T *input_addr, const size_t
size_t stride = 1;
std::vector<size_t> axes(input_shape_.size());
int k = 0;
for (int i = 0; i < dims; ++i) {
if (i != axis_) {
size_t axis_size = IntToSize(axis_);
for (size_t i = 0; i < IntToSize(dims); ++i) {
if (i != axis_size) {
axes[k] = i;
++k;
} else {
stride *= input_shape_[i];
}
}
axes[k] = axis_;
axes[k] = axis_size;

std::vector<size_t> transpose_shape(input_shape_.size());
for (int i = 0; i < dims; ++i) {
for (size_t i = 0; i < IntToSize(dims); ++i) {
transpose_shape[i] = input_shape_[axes[i]];
}

@@ -109,7 +110,7 @@ void L2NormalizeCPUKernel<T>::CalcOutput(const T *input_addr, const std::vector<

template <typename T>
bool L2NormalizeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/l2_normalize_cpu_kernel.h View File

@@ -46,7 +46,7 @@ class L2NormalizeCPUKernel : public CPUKernel {
std::vector<size_t> input_shape_;
std::vector<size_t> output_shape_;
T epsilon_;
int axis_;
int axis_{0};
void CheckParam(const CNodePtr &kernel_node);
};



+ 5
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.cc View File

@@ -31,7 +31,7 @@ void L2NormalizeGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {

int output_dim_length = output_shape.size();
dim_elem_num_list_.resize(output_dim_length, 1);
for (int i = output_dim_length - 2; i >= 0; i--) {
for (int i = output_dim_length - 2; i >= 0; i--) { // from -2 to 0 dim
dim_elem_num_list_[i] = output_shape[i + 1] * dim_elem_num_list_[i + 1];
}

@@ -138,14 +138,15 @@ void L2NormalizeGradCPUKernel<T>::GetSumOfProduct(const std::vector<T> &x_vector
for (size_t i = 0; i < len; i++) {
tmp_vector[i] = x_vector[i] * y_vector[i];
}
if (len % 2 == 1) {
const size_t half = 2;
if (len % half == 1) {
tmp_vector[0] += tmp_vector[len - 1];
}
for (size_t stride = len / 2; stride > 0; stride >>= 1) {
for (size_t stride = len / half; stride > 0; stride >>= 1) {
for (size_t i = 0; i < stride; i++) {
tmp_vector[i] += tmp_vector[i + stride];
}
if (stride > 2 && stride % 2 == 1) {
if (stride > half && stride % half == 1) {
tmp_vector[0] += tmp_vector[stride - 1];
}
}


+ 7
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.cc View File

@@ -23,12 +23,14 @@ namespace kernel {
template <typename T>
void MaskedSelectCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaskedSelectCPUKernel needs 2 input.";
if (input_num != kInputNum) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaskedSelectCPUKernel needs " << kInputNum
<< " input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaskedSelectCPUKernel needs 1 output.";
if (output_num != kOutputNum) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaskedSelectCPUKernel needs " << kOutputNum
<< " output.";
}
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
@@ -69,7 +71,7 @@ bool MaskedSelectCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
}
std::vector<size_t> out_shape;
out_shape.emplace_back(j);
(void)out_shape.emplace_back(j);
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
std::vector<TypeId> dtypes(output_num);
for (size_t i = 0; i < output_num; i++) {


+ 2
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.h View File

@@ -24,6 +24,8 @@

namespace mindspore {
namespace kernel {
constexpr size_t kInputNum = 2;
constexpr size_t kOutputNum = 1;
template <typename T>
class MaskedSelectCPUKernel : public CPUKernel {
public:


+ 12
- 10
mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_grad_cpu_kernel.cc View File

@@ -23,16 +23,18 @@ namespace kernel {
template <typename T>
void MaskedSelectGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaskedSelectGradCPUKernel needs 3 input.";
if (input_num != kInputNum) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaskedSelectGradCPUKernel needs " << kInputNum
<< " input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaskedSelectGradCPUKernel needs 1 output.";
if (output_num != kOutputNum) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaskedSelectGradCPUKernel needs " << kOutputNum
<< " output.";
}
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, INPUT);
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, MASK);
grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, GRAD);
output_shape_ = CPUKernelUtils::GetBroadcastShape(input_shape_a_, input_shape_b_);
for (const uint64_t &d : output_shape_) {
tensor_size_ *= d;
@@ -43,9 +45,9 @@ template <typename T>
bool MaskedSelectGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto mask = reinterpret_cast<bool *>(inputs[1]->addr);
auto grad = reinterpret_cast<T *>(inputs[2]->addr);
auto dx = reinterpret_cast<T *>(outputs[0]->addr);
auto mask = reinterpret_cast<bool *>(inputs[MASK]->addr);
auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr);
auto dx = reinterpret_cast<T *>(outputs[INPUT]->addr);

auto ret = memset_s(dx, outputs[0]->size, 0, outputs[0]->size);
if (ret != EOK) {


+ 3
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_grad_cpu_kernel.h View File

@@ -24,6 +24,8 @@

namespace mindspore {
namespace kernel {
constexpr size_t kInputNum = 3;
constexpr size_t kOutputNum = 1;
template <typename T>
class MaskedSelectGradCPUKernel : public CPUKernel {
public:
@@ -41,6 +43,7 @@ class MaskedSelectGradCPUKernel : public CPUKernel {
std::vector<size_t> grad_shape_;
std::vector<size_t> output_shape_;
uint64_t tensor_size_ = 1;
enum input_list_ { INPUT, MASK, GRAD };
};

MS_REG_CPU_KERNEL_T(MaskedSelectGrad,


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_cpu_kernel.cc View File

@@ -35,7 +35,7 @@ void BatchNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {
momentum = AnfAlgo::GetNodeAttr<float>(kernel_node, "momentum");
std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (x_shape.size() == 2) {
x_shape.insert(x_shape.end(), 2, 1);
x_shape.insert(x_shape.end(), 2, 1); // expand 2 dim: NC -> NCHW
} else if (x_shape.size() != 4) {
MS_LOG(EXCEPTION) << "Batchnorm only support nchw input!";
}


+ 7
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc View File

@@ -23,6 +23,7 @@ namespace mindspore {
namespace kernel {
const int kMaxLSTMLayer = 100;
const int kOutputWorkSpaceIndex = 3;
const size_t kGateNum = 4;
void LstmCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
output_size_list_[kOutputWorkSpaceIndex] = reserve_size_;
@@ -31,10 +32,10 @@ void LstmCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
auto output_types = std::vector<TypeId>(output_num, output_type);
std::vector<std::vector<size_t>> output_shapes;
for (size_t output_index = 0; output_index < output_num; ++output_index) {
std::vector<size_t> shape = AnfAlgo::GetOutputInferShape(kernel_node, output_index);
auto shape = AnfAlgo::GetOutputInferShape(kernel_node, output_index);
output_shapes.emplace_back(shape);
}
size_t len = reserve_size_ / 4;
size_t len = reserve_size_ / kGateNum;
output_shapes[kOutputWorkSpaceIndex] = {len, 1};
AnfAlgo::SetOutputInferTypeAndShape(output_types, output_shapes, kernel_node.get());
}
@@ -56,9 +57,9 @@ void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) {
dim src_dims = {seq_len_, batch_size_, input_size_};
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_};
weights_dims_ = {num_layers_, num_directions_, input_size_, kGateNum, hidden_size_};
weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, kGateNum, hidden_size_};
bias_dims_ = {num_layers_, num_directions_, kGateNum, hidden_size_};
dim dst_dims = {seq_len_, batch_size_, static_cast<int64_t>(hidden_size_) * num_directions_};
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
@@ -115,7 +116,7 @@ void LstmCPUKernel::CheckParam(const CNodePtr &kernel_node) {
if (bidirectional_) {
num_directions_ = 2;
}
const int gate_size = 4 * hidden_size_;
const int gate_size = kGateNum * hidden_size_;
if (num_layers_ <= 0) {
MS_LOG(EXCEPTION) << "Layers must be greater than zero!";
}


+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/print_cpu_kernel.cc View File

@@ -38,8 +38,8 @@ void PrintCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {

template <typename T>
bool PrintCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /*outputs*/) {
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> & /* outputs */) {
auto data_type = CheckType();
if (data_type == kTypeUnknown) {
MS_LOG(EXCEPTION) << "CPU print does not support the input type.";


+ 3
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.h View File

@@ -41,9 +41,9 @@ class ResizeBilinearCPUKernel : public CPUKernel {
private:
void CheckParam(const CNodePtr &kernel_node);
TypeId dtype_{kTypeUnknown};
bool align_corners_ = false;
float height_scale;
float width_scale;
bool align_corners_{false};
float height_scale{1.0};
float width_scale{1.0};
std::vector<int64_t> size_;
std::vector<size_t> shape_;
};


+ 0
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/rmsprop_cpu_kernel.cc View File

@@ -117,6 +117,5 @@ bool RMSPropCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
}
return true;
}

} // namespace kernel
} // namespace mindspore

+ 5
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.cc View File

@@ -20,7 +20,6 @@

namespace mindspore {
namespace kernel {

template <typename T>
void ScatterArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
@@ -43,11 +42,11 @@ template <typename T>
void ScatterArithmeticCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) const {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
if (input_num != kInputNum) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterAdd needs 3 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
if (output_num != kOutputNum) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterAdd has 1 output.";
}
}
@@ -65,9 +64,9 @@ bool ScatterArithmeticCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr>
{"ScatterMin", &ScatterArithmeticCPUKernel<T>::ScatterMin},
{"ScatterUpdate", &ScatterArithmeticCPUKernel<T>::ScatterUpdate}};
if (kScatterArithmeticBinOpFuncMap.find(kernel_name_) != kScatterArithmeticBinOpFuncMap.end()) {
T *input = reinterpret_cast<T *>(inputs[0]->addr);
int *indices = reinterpret_cast<int *>(inputs[1]->addr);
T *updates = reinterpret_cast<T *>(inputs[2]->addr);
T *input = reinterpret_cast<T *>(inputs[INPUT]->addr);
int *indices = reinterpret_cast<int *>(inputs[INDICES]->addr);
T *updates = reinterpret_cast<T *>(inputs[UPDATES]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);
kScatterArithmeticBinOpFuncMap.at(kernel_name_)(this, input, indices, updates);
auto bufferSize = outputs[0]->size;


+ 3
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.h View File

@@ -22,6 +22,8 @@

namespace mindspore {
namespace kernel {
constexpr size_t kInputNum = 3;
constexpr size_t kOutputNum = 1;
template <typename T>
class ScatterArithmeticCPUKernel : public CPUKernel {
public:
@@ -55,6 +57,7 @@ class ScatterArithmeticCPUKernel : public CPUKernel {
size_t inner_size_{0};
size_t indices_size_{0};
std::string kernel_name_;
enum input_list_ { INPUT, INDICES, UPDATES };
};

MS_REG_CPU_KERNEL_T(ScatterAdd,


+ 6
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc View File

@@ -51,12 +51,12 @@ bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::v
const std::vector<AddressPtr> &outputs) {
CheckParam(inputs, outputs);

auto param = reinterpret_cast<T *>(inputs[0]->addr);
auto grad = reinterpret_cast<T *>(inputs[1]->addr);
auto lr = reinterpret_cast<T *>(inputs[2]->addr);
auto accum = reinterpret_cast<T *>(inputs[3]->addr);
auto momentum = reinterpret_cast<T *>(inputs[4]->addr);
auto stat = reinterpret_cast<T *>(inputs[5]->addr);
auto param = reinterpret_cast<T *>(inputs[PARAM]->addr);
auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr);
auto lr = reinterpret_cast<T *>(inputs[LR]->addr);
auto accum = reinterpret_cast<T *>(inputs[ACCUM]->addr);
auto momentum = reinterpret_cast<T *>(inputs[MOMENTUM]->addr);
auto stat = reinterpret_cast<T *>(inputs[STAT]->addr);
auto output_param = reinterpret_cast<T *>(outputs[0]->addr);
size_t elem_num = inputs[0]->size / sizeof(T);



+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h View File

@@ -39,6 +39,7 @@ class SGDCPUKernel : public CPUKernel {
float dampening_{0.0};
float weight_decay_{0.0};
bool nesterov_{true};
enum input_list_ { PARAM, GRAD, LR, ACCUM, MOMENTUM, STAT };
};

MS_REG_CPU_KERNEL_T(SGD,


+ 3
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.cc View File

@@ -29,16 +29,16 @@ void SpaceToDepthCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {

input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
block_size_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size");
block_size_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size"));
}

template <typename T>
bool SpaceToDepthCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
size_t size = IntToSize(inputs[0]->size / sizeof(T));
size_t size = inputs[0]->size / sizeof(T);

std::vector<size_t> input_shape = input_shape_;
std::vector<size_t> output_shape = output_shape_;


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.h View File

@@ -36,7 +36,7 @@ class SpaceToDepthCPUKernel : public CPUKernel {
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> input_shape_;
std::vector<size_t> output_shape_;
size_t block_size_;
size_t block_size_{0};
};

MS_REG_CPU_KERNEL_T(


+ 7
- 7
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.cc View File

@@ -24,14 +24,14 @@ template <typename I, typename T>
void SparseTensorDenseMatmulCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_node) {
adj_st_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_ST);
adj_dt_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_dT);
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (indices_shape.size() != 2 && indices_shape[1] != 2) {
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, INDICES);
if (indices_shape.size() != kIndicesSizeNum && indices_shape[1] != kIndices2rdDimNum) {
MS_LOG(EXCEPTION)
<< "SparseTensorDenseMatmul requires 'indices' should be a 2-D Tensor and the second dimension length "
"should be 2, but got 'indices' shape: "
<< indices_shape;
}
auto values_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto values_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES);
if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) {
MS_LOG(EXCEPTION)
<< "SparseTensorDenseMatmul requires 'value's should be a 1-D Tensor and the first dimension length should be "
@@ -40,14 +40,14 @@ void SparseTensorDenseMatmulCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_n
}
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
values_size_ = values_shape[0];
b_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
b_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, DENSE);
}

template <typename I, typename T>
bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != 4 || outputs.size() != 1) {
if (inputs.size() != kInputNum || outputs.size() != kOutputNum) {
MS_LOG(ERROR) << "SparseTensorDenseMatmul requires 4 inputs and 1 output, but got " << inputs.size()
<< " inputs and " << outputs.size() << " output.";
return false;
@@ -74,7 +74,7 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad
const size_t same_dim = adj_dt_ ? b_dim_1 : b_dim_0;

for (size_t i = 0; i < values_size_; ++i) {
if (i * 2 + 1 >= indices_length) {
if (i * 2 + 1 >= indices_length) { // the interval is 2
MS_LOG(EXCEPTION) << "The index of a_indices out of bounds.";
}
if (i >= values_length) {


+ 5
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_tensor_dense_matmul_cpu_kernel.h View File

@@ -23,6 +23,10 @@

namespace mindspore {
namespace kernel {
constexpr size_t kInputNum = 4;
constexpr size_t kOutputNum = 1;
constexpr size_t kIndicesSizeNum = 2;
constexpr size_t kIndices2rdDimNum = 2;
template <typename I, typename T>
class SparseTensorDenseMatmulCPUKernel : public CPUKernel {
public:
@@ -41,6 +45,7 @@ class SparseTensorDenseMatmulCPUKernel : public CPUKernel {
size_t values_size_{0};
bool adj_st_{false};
bool adj_dt_{false};
enum input_list_ { INDICES, VALUES, SPARSE_SHAPE, DENSE };
};

MS_REG_CPU_KERNEL_T_S(SparseTensorDenseMatmul,


+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc View File

@@ -33,7 +33,7 @@ void SplitCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
void SplitCPUKernel<T>::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
workspace_size_list_.emplace_back((sizeof(T *) * output_num_));
(void)workspace_size_list_.emplace_back((sizeof(T *) * static_cast<size_t>(output_num_)));
}

template <typename T>
@@ -45,12 +45,12 @@ bool SplitCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
}

template <typename T>
void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t size) {
void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t /* size */) {
SplitParameter param;
param.num_split_ = output_num_;
param.split_dim_ = axis_;
param.strides_[input_shape_.size() - 1] = 1;
for (int i = input_shape_.size() - 2; i >= 0; i--) {
for (int i = input_shape_.size() - 2; i >= 0; i--) { // from -2 to 0 dim
param.strides_[i] = param.strides_[i + 1] * input_shape_[i + 1];
}
auto split_sizes = std::make_unique<int[]>(param.num_split_);
@@ -103,7 +103,7 @@ void SplitCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
if (axis_ < 0) {
axis_ += SizeToInt(input_shape_.size());
}
if (output_num_ > SizeToInt(input_shape_[axis_])) {
if (output_num_ > IntToLong(input_shape_[axis_])) {
MS_LOG(EXCEPTION) << "Attr output_num " << output_num_ << " must less than " << input_shape_[axis_];
}
if (output_num_ != output_num) {


+ 4
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/strided_slice_grad_cpu_kernel.cc View File

@@ -23,8 +23,10 @@
namespace mindspore {
namespace kernel {
void StridedSliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
// CheckParam(kernel_node);
param_ = (struct StridedSliceParameter *)malloc(sizeof(struct StridedSliceParameter));
if (param_ == nullptr) {
MS_LOG(ERROR) << "malloc StridedSliceGradParameter failed.";
}
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
switch (dtype_) {
@@ -91,7 +93,7 @@ void StridedSliceGradCPUKernel::ExpandAllMemberDims() {
}

bool StridedSliceGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
bool ret{true};
if (dtype_ == kNumberTypeFloat32) {


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/strided_slice_grad_cpu_kernel.h View File

@@ -43,7 +43,7 @@ class StridedSliceGradCPUKernel : public CPUKernel {
std::vector<int> input_shape_;
std::vector<size_t> output_shape_;
TypeId dtype_{kTypeUnknown};
StridedSliceParameter *param_;
StridedSliceParameter *param_{nullptr};
};

MS_REG_CPU_KERNEL(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/tensor_copy_slices_cpu_kernel.cc View File

@@ -46,7 +46,7 @@ void TensorCopySlicesCPUKernel::InitKernel(const CNodePtr &kernel_node) {
}

bool TensorCopySlicesCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /* workspace */,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != 2 || outputs.size() != 1) {
MS_LOG(ERROR) << "TensorCopySlices requires 1 input and 1 output, but got " << inputs.size() << " input and "


+ 3
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.cc View File

@@ -111,7 +111,9 @@ void TileCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st
tile_parameter_.data_size_ = sizeof(T);

if (one_dim_tile_) {
auto task = [&](size_t start, size_t end) { TileSimple(x_addr, y_addr, start, end, &tile_parameter_); };
auto task = [&x_addr, &y_addr, this](size_t start, size_t end) {
TileSimple(x_addr, y_addr, start, end, &tile_parameter_);
};
ParallelLaunchAutoSearch(task, tile_parameter_.fast_outer_size_, this, &parallel_search_info_);
return;
}


+ 1
- 1
model_zoo/official/cv/centerface/ascend310_infer/src/main.cc View File

@@ -67,7 +67,7 @@ DEFINE_int32(image_width, 832, "image width");

int Resize_Affine(const MSTensor &input, MSTensor *output) {
int new_height, new_width;
float scale = 0.999;
const float scale = 0.999;
auto imgResize = MSTensor();
std::vector<int64_t> shape = input.Shape();
new_height = static_cast<int>(shape[0] * scale);


Loading…
Cancel
Save