Browse Source

!17111 clean pclint for cpu operator

From: @zhao_ting_v
Reviewed-by: @liangchenghui,@oacjiewen
Signed-off-by: @liangchenghui
tags/v1.3.0
mindspore-ci-bot Gitee 4 years ago
parent
commit
d59fa1c15f
23 changed files with 76 additions and 74 deletions
  1. +1
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc
  2. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc
  3. +12
    -12
      mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
  4. +11
    -11
      mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc
  5. +2
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
  6. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h
  7. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc
  8. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/isfinite_cpu_kernel.cc
  9. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/isfinite_cpu_kernel.h
  10. +6
    -6
      mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.cc
  11. +9
    -10
      mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.cc
  12. +1
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h
  13. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_cpu_kernel.cc
  14. +5
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_grad_cpu_kernel.cc
  15. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_gard_cpu_kernel.cc
  16. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batchmatmul_cpu_kernel.cc
  17. +3
    -2
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
  18. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc
  19. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h
  20. +2
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc
  21. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h
  22. +7
    -5
      mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc
  23. +4
    -4
      mindspore/ccsrc/backend/kernel_compiler/cpu/topk_cpu_kernel.cc

+ 1
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc View File

@@ -103,8 +103,7 @@ void AdamDeltaCPUKernel::CheckParams(const std::vector<kernel::AddressPtr> &inpu
}
}

bool AdamDeltaCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
bool AdamDeltaCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CheckParams(inputs, outputs);
auto m = reinterpret_cast<float *>(inputs[0]->addr);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc View File

@@ -52,7 +52,7 @@ void ArgmaxCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
size_t shape_len = shape_.size();
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
axis += shape_len;
axis += SizeToLong(shape_len);
if (axis < 0) {
MS_LOG(EXCEPTION) << "Invalid axis:" << axis << ", should in range [-1, " << (shape_len - 1) << "]";
}


+ 12
- 12
mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc View File

@@ -23,7 +23,7 @@ namespace mindspore {
namespace kernel {
template <typename T>
void ArithmeticCPUKernel<T>::AssignAdd(T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = input1[i] + input2[i];
input1[i] = out[i];
@@ -34,7 +34,7 @@ void ArithmeticCPUKernel<T>::AssignAdd(T *input1, const T *input2, T *out) {

template <typename T>
void ArithmeticCPUKernel<T>::Add(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = input1[i] + input2[i];
}
@@ -44,7 +44,7 @@ void ArithmeticCPUKernel<T>::Add(const T *input1, const T *input2, T *out) {

template <typename T>
void ArithmeticCPUKernel<T>::Sub(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = input1[i] - input2[i];
}
@@ -54,7 +54,7 @@ void ArithmeticCPUKernel<T>::Sub(const T *input1, const T *input2, T *out) {

template <typename T>
void ArithmeticCPUKernel<T>::Mul(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = input1[i] * input2[i];
}
@@ -64,7 +64,7 @@ void ArithmeticCPUKernel<T>::Mul(const T *input1, const T *input2, T *out) {

template <typename T>
void ArithmeticCPUKernel<T>::RealDiv(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto dividend = input1[i];
auto divisor = input2[i];
@@ -89,7 +89,7 @@ void ArithmeticCPUKernel<T>::RealDiv(const T *input1, const T *input2, T *out) {

template <typename T>
void ArithmeticCPUKernel<T>::Div(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto dividend = input1[i];
auto divisor = input2[i];
@@ -114,7 +114,7 @@ void ArithmeticCPUKernel<T>::Div(const T *input1, const T *input2, T *out) {

template <typename T>
void ArithmeticCPUKernel<T>::FloorDiv(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto dividend = input1[i];
auto divisor = input2[i];
@@ -139,7 +139,7 @@ void ArithmeticCPUKernel<T>::FloorDiv(const T *input1, const T *input2, T *out)

template <typename T>
void ArithmeticCPUKernel<T>::Mod(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto x = static_cast<double>(input1[i]);
auto y = static_cast<double>(input2[i]);
@@ -157,7 +157,7 @@ void ArithmeticCPUKernel<T>::Mod(const T *input1, const T *input2, T *out) {

template <typename T>
void ArithmeticCPUKernel<T>::FloorMod(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto x = static_cast<double>(input1[i]);
auto y = static_cast<double>(input2[i]);
@@ -170,7 +170,7 @@ void ArithmeticCPUKernel<T>::FloorMod(const T *input1, const T *input2, T *out)

template <typename T>
void ArithmeticCPUKernel<T>::Pow(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto x = static_cast<double>(input1[i]);
auto y = static_cast<double>(input2[i]);
@@ -182,7 +182,7 @@ void ArithmeticCPUKernel<T>::Pow(const T *input1, const T *input2, T *out) {

template <typename T>
void ArithmeticCPUKernel<T>::SquaredDifference(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
T diff = input1[i] - input2[i];
out[i] = diff * diff;
@@ -193,7 +193,7 @@ void ArithmeticCPUKernel<T>::SquaredDifference(const T *input1, const T *input2,

template <typename T>
void ArithmeticCPUKernel<T>::Atan2(const T *input1, const T *input2, T *out) {
auto task = [&](size_t start, size_t end) {
auto task = [&input1, &input2, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = (T)atan2(static_cast<double>(input1[i]), static_cast<double>(input2[i]));
}


+ 11
- 11
mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc View File

@@ -147,7 +147,7 @@ template <typename T>
void Asin(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = asin(in[i]);
out[i] = static_cast<T>(asin(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -157,7 +157,7 @@ template <typename T>
void ACos(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = acos(in[i]);
out[i] = static_cast<T>(acos(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -167,7 +167,7 @@ template <typename T>
void Atan(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = atan(in[i]);
out[i] = static_cast<T>(atan(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -177,7 +177,7 @@ template <typename T>
void Sin(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = sin(in[i]);
out[i] = static_cast<T>(sin(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -187,7 +187,7 @@ template <typename T>
void Cos(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = cos(in[i]);
out[i] = static_cast<T>(cos(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -197,7 +197,7 @@ template <typename T>
void Tan(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = tan(in[i]);
out[i] = static_cast<T>(tan(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -207,7 +207,7 @@ template <typename T>
void Sinh(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = sinh(in[i]);
out[i] = static_cast<T>(sinh(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -217,7 +217,7 @@ template <typename T>
void Cosh(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = cosh(in[i]);
out[i] = static_cast<T>(cosh(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -227,7 +227,7 @@ template <typename T>
void Asinh(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = asinh(in[i]);
out[i] = static_cast<T>(asinh(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -237,7 +237,7 @@ template <typename T>
void Acosh(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = acosh(in[i]);
out[i] = static_cast<T>(acosh(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);
@@ -247,7 +247,7 @@ template <typename T>
void Atanh(const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = atanh(in[i]);
out[i] = static_cast<T>(atanh(static_cast<double>(in[i])));
}
};
CPUKernelUtils::ParallelFor(task, size);


+ 2
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc View File

@@ -127,7 +127,8 @@ std::pair<bool, size_t> CPUKernelFactory::CPUKernelAttrCheck(const std::string &
return std::make_pair(false, 0);
}

bool CPUKernelFactory::CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) {
bool CPUKernelFactory::CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr,
const KernelBuildInfo &kernel_info) const {
for (size_t i = 0; i < kernel_info.GetInputNum(); ++i) {
auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetInputAttr(0).first : kernel_attr.GetInputAttr(i).first;
if (kernel_info.GetInputDeviceType(i) != dtype) {


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h View File

@@ -46,7 +46,7 @@ class CPUKernelFactory {
~CPUKernelFactory() = default;
DISABLE_COPY_AND_ASSIGN(CPUKernelFactory)
std::pair<bool, size_t> CPUKernelAttrCheck(const std::string &kernel_name, const KernelBuildInfo &kernel_info);
bool CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info);
bool CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) const;
std::map<std::string, std::vector<std::pair<KernelAttr, CPUKernelCreator>>> name_to_attr_creator_;
};



+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc View File

@@ -32,7 +32,7 @@ bool DebugCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const
auto output = reinterpret_cast<int *>(outputs[0]->addr);
size_t elem_num = inputs[0]->size / sizeof(int);
for (size_t i = 0; i < elem_num; i++) {
output[i] = val[i];
output[i] = static_cast<int>(val[i]);
}

return true;


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/isfinite_cpu_kernel.cc View File

@@ -55,7 +55,7 @@ bool IsFiniteCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, co
}

void IsFiniteCPUKernel::LaunchKernelFloat16(const std::vector<AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
const std::vector<kernel::AddressPtr> &outputs) const {
float16 *input = reinterpret_cast<float16 *>(inputs[0]->addr);
bool *output = reinterpret_cast<bool *>(outputs[0]->addr);



+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/isfinite_cpu_kernel.h View File

@@ -39,7 +39,7 @@ class IsFiniteCPUKernel : public CPUKernel {

void LaunchKernelOther(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);

void LaunchKernelFloat16(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
void LaunchKernelFloat16(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) const;

private:
std::map<TypeId, size_t> dtype_map_ = {{kNumberTypeBool, sizeof(bool)}, {kNumberTypeInt8, sizeof(int8_t)},


+ 6
- 6
mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.cc View File

@@ -34,16 +34,16 @@ void LayerNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {
if (begin_params_axis < 0) {
begin_params_axis += x_shape.size();
}
for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) {
for (size_t i = 0; i < LongToSize(begin_norm_axis); i++) {
block_num_ *= x_shape[i];
}
for (size_t i = IntToSize(begin_norm_axis); i < x_shape.size(); i++) {
for (size_t i = LongToSize(begin_norm_axis); i < x_shape.size(); i++) {
block_size_ *= x_shape[i];
}
for (size_t i = IntToSize(begin_params_axis); i < x_shape.size(); i++) {
for (size_t i = LongToSize(begin_params_axis); i < x_shape.size(); i++) {
param_num_ *= x_shape[i];
}
if (block_num_ <= 0 || block_size_ <= 0) {
if (block_num_ == 0 || block_size_ == 0) {
MS_LOG(EXCEPTION) << "LayerNormCPUKernel input shape error, input shape: " << x_shape;
}
}
@@ -93,8 +93,8 @@ void LayerNormCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, con
sum += x[j];
square_sum += x[j] * x[j];
}
T block_mean = sum / block_size_;
T block_var = square_sum / block_size_ - block_mean * block_mean;
T block_mean = sum / static_cast<T>(block_size_);
T block_var = square_sum / static_cast<T>(block_size_) - block_mean * block_mean;
for (size_t j = i * block_size_; j < (i + 1) * block_size_; ++j) {
auto param_shift = j % param_num_;
y[j] = (x[j] - block_mean) / (T)std::sqrt(static_cast<double>(block_var) + eps_) * gamma[param_shift] +


+ 9
- 10
mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.cc View File

@@ -33,30 +33,30 @@ void LayerNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
if (begin_params_axis < 0) {
begin_params_axis += x_shape.size();
}
for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) {
for (size_t i = 0; i < LongToSize(begin_norm_axis); i++) {
block_num_ *= x_shape[i];
}
for (size_t i = IntToSize(begin_norm_axis); i < x_shape.size(); i++) {
for (size_t i = LongToSize(begin_norm_axis); i < x_shape.size(); i++) {
block_size_ *= x_shape[i];
}
for (size_t i = 0; i < IntToSize(begin_params_axis); i++) {
for (size_t i = 0; i < LongToSize(begin_params_axis); i++) {
param_size_ *= x_shape[i];
}
for (size_t i = begin_params_axis; i < x_shape.size(); i++) {
for (size_t i = LongToSize(begin_params_axis); i < x_shape.size(); i++) {
param_num_ *= x_shape[i];
}
if (block_num_ <= 0 || block_size_ <= 0) {
if (block_num_ == 0 || block_size_ == 0) {
MS_LOG(EXCEPTION) << "LayerNormGradCPUKernel input shape error, input shape: " << x_shape;
}
}

bool LayerNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel<float16>(inputs, workspace, outputs);
LaunchKernel<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) {
LaunchKernel<float>(inputs, workspace, outputs);
LaunchKernel<float>(inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "input dtype only support float16, float32, float64";
}
@@ -65,7 +65,6 @@ bool LayerNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &input

template <typename T>
void LayerNormGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
auto x = reinterpret_cast<T *>(inputs[0]->addr);
auto dy = reinterpret_cast<T *>(inputs[1]->addr);
@@ -123,7 +122,7 @@ void LayerNormGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
auto var_sqrt = (T)std::pow(static_cast<double>(var[norm_shift]) + eps_, -0.5);
auto dx1 = dy[j] * gamma[param_shift] * var_sqrt;
auto dx2 = sum1 * (T)2.0 / block_size_ * (x[j] - mean[norm_shift]);
auto dx3 = ((T)(-1.0) * var_sqrt * sum2 + ((T)1.0 / block_size_) * sum1 * sum3) * ((T)1.0 / block_size_);
auto dx3 = ((T)(-1.0) * var_sqrt * sum2 + ((T)1.0 / (T)block_size_) * sum1 * sum3) * ((T)1.0 / (T)block_size_);
dx[j] = dx1 + dx2 + dx3;
}
}


+ 1
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h View File

@@ -35,8 +35,7 @@ class LayerNormGradCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;

template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs);
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

private:
void CheckParam(const CNodePtr &kernel_node);


+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_cpu_kernel.cc View File

@@ -133,13 +133,13 @@ void MaximumCPUKernel<T>::InitTensorBroadcastShape() {
}
int input_x_dim_offset = output_shape_.size() - input_x_shape_.size();
for (size_t j = 0; j < input_x_shape_.size(); j++) {
broadcast_input_x_shape_[j + input_x_dim_offset] = input_x_shape_[j];
broadcast_input_x_shape_[j + IntToSize(input_x_dim_offset)] = input_x_shape_[j];
input_x_num_ *= input_x_shape_[j];
}
int input_y_dim_offset = output_shape_.size() - input_y_shape_.size();
for (size_t k = 0; k < input_y_shape_.size(); k++) {
if (need_broadcast_) {
broadcast_input_y_shape_[k + input_y_dim_offset] = input_y_shape_[k];
broadcast_input_y_shape_[k + IntToSize(input_y_dim_offset)] = input_y_shape_[k];
input_y_num_ *= input_y_shape_[k];
}
}


+ 5
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/maximum_grad_cpu_kernel.cc View File

@@ -115,9 +115,11 @@ void MaximumGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, c
size_t y_tensor_len = GetTensorLen(y_shape_);
size_t x_tensor_size = x_tensor_len * sizeof(T);
size_t y_tensor_size = y_tensor_len * sizeof(T);
memset_s(dx_addr, x_tensor_size, 0, x_tensor_size);
memset_s(dy_addr, y_tensor_size, 0, y_tensor_size);

auto res_dx = memset_s(dx_addr, x_tensor_size, 0, x_tensor_size);
auto res_dy = memset_s(dy_addr, y_tensor_size, 0, y_tensor_size);
if (res_dx != EOK || res_dy != EOK) {
MS_LOG(EXCEPTION) << "MaximumGradCPUKernel LaunchKernel task memset failed.";
}
std::vector<size_t> x_shape(dout_shape.size(), 1);
std::vector<size_t> y_shape(dout_shape.size(), 1);
std::vector<size_t> x_cargo(dout_shape.size(), 0);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batch_norm_gard_cpu_kernel.cc View File

@@ -84,7 +84,7 @@ bool BatchNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &input
auto wksp_in = reinterpret_cast<float *>(workspace[0]->addr);
auto scale_ret = memcpy_s(wksp_in, workspace[0]->size, inputs[2]->addr, inputs[2]->size);
auto max_size = workspace[0]->size - inputs[2]->size;
auto bias_ret = memset_s(wksp_in + (inputs[2]->size / sizeof(float)), max_size, 0., max_size);
auto bias_ret = memset_s(wksp_in + (inputs[2]->size / sizeof(float)), max_size, 0, max_size);
if (scale_ret != 0 && bias_ret != 0) {
MS_LOG(EXCEPTION) << "Memcpy_s error.";
return false;


+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/batchmatmul_cpu_kernel.cc View File

@@ -21,7 +21,7 @@

namespace mindspore {
namespace kernel {
bool BatchMatMulCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool BatchMatMulCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "batchmatmul error input output size!";
@@ -83,7 +83,7 @@ void BatchMatMulCPUKernel::InitKernel(const CNodePtr &kernel_node) {
}

auto input1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
dim_k_ = trans_a ? input1_shape[dims - 2] : input1_shape[dims - 1];
dim_k_ = static_cast<dnnl_dim_t>(trans_a ? input1_shape[dims - 2] : input1_shape[dims - 1]);

trans_a_ = trans_a ? TRANSPOSE_YES : TRANSPOSE_NO;
trans_b_ = trans_b ? TRANSPOSE_YES : TRANSPOSE_NO;


+ 3
- 2
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc View File

@@ -58,8 +58,9 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
if (stride_me.size() < h_index + 2) {
MS_LOG(EXCEPTION) << "Strides should greater than " << (h_index + 1) << ", but got " << stride_me.size();
}
(void)std::transform(stride_me.begin() + h_index, stride_me.begin() + h_index + 2, std::back_inserter(stride_ori),
[](const int64_t &value) { return static_cast<int>(value); });
auto h_index_int64 = SizeToLong(h_index);
(void)std::transform(stride_me.begin() + h_index_int64, stride_me.begin() + h_index_int64 + 2,
std::back_inserter(stride_ori), [](const int64_t &value) { return static_cast<int>(value); });
(void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),
[](const int64_t &value) { return static_cast<int>(value); });



+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc View File

@@ -23,7 +23,7 @@
namespace mindspore {
namespace kernel {
dnnl::eltwise_forward::desc EltWiseCPUKernel::GetForwardEltwiseDesc(const CNodePtr &kernel_node,
dnnl::memory::desc src_desc) {
const dnnl::memory::desc src_desc) {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "ReLU") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_relu, src_desc, 0.0);


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h View File

@@ -32,7 +32,7 @@ class EltWiseCPUKernel : public MKLCPUKernel {
const std::vector<AddressPtr> &outputs) override;

private:
dnnl::eltwise_forward::desc GetForwardEltwiseDesc(const CNodePtr &kernel_node, dnnl::memory::desc src_desc);
dnnl::eltwise_forward::desc GetForwardEltwiseDesc(const CNodePtr &kernel_node, const dnnl::memory::desc src_desc);
dnnl::prop_kind DnnlForward = dnnl::prop_kind::forward_training;
};



+ 2
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc View File

@@ -159,14 +159,13 @@ void LSTMGradCPUKernel::SetArgumentHandleOp(const std::vector<kernel::AddressPtr
SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER_C, inputs[9]->addr);
}
void LSTMGradCPUKernel::ResetMemory(const dnnl::memory &mem, string name) {
void LSTMGradCPUKernel::ResetMemory(const dnnl::memory &mem, const string name) const {
if (memset_s(mem.get_data_handle(), mem.get_desc().get_size(), 0, mem.get_desc().get_size())) {
MS_LOG(EXCEPTION) << name << " memset error";
}
}
bool LSTMGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
bool LSTMGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
using dt = dnnl::memory::data_type;
using tag = dnnl::memory::format_tag;


+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h View File

@@ -45,7 +45,7 @@ class LSTMGradCPUKernel : public MKLCPUKernel {
const dnnl::memory &weights_h_memory, const dnnl::memory &bias_memory,
const dnnl::memory &diff_weights_memory, const dnnl::memory &diff_weights_h_memory,
const dnnl::memory &diff_bias_memory);
void ResetMemory(const dnnl::memory &mem, string name);
void ResetMemory(const dnnl::memory &mem, const string name) const;
void CheckParam(const CNodePtr &kernel_node);
int64_t weight_size_ = 0;
int64_t weight_h_size_ = 0;


+ 7
- 5
mindspore/ccsrc/backend/kernel_compiler/cpu/slice_grad_cpu_kernel.cc View File

@@ -107,10 +107,12 @@ bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inp
}
bool can_copy_memory[3] = {CanCopyMemoryOnAxis(0), CanCopyMemoryOnAxis(1), CanCopyMemoryOnAxis(2)};
int stride_signs[4] = {SignOfStride(0), SignOfStride(1), SignOfStride(2), SignOfStride(3)};
size_t out_start_offset[3] = {begin_[0] * output_element_num_[0], begin_[1] * output_element_num_[1],
begin_[2] * output_element_num_[2]};
size_t out_step_size[3] = {strides_[0] * output_element_num_[0], strides_[1] * output_element_num_[1],
strides_[2] * output_element_num_[2]};
size_t out_start_offset[3] = {IntToSize(begin_[0]) * output_element_num_[0],
IntToSize(begin_[1]) * output_element_num_[1],
IntToSize(begin_[2]) * output_element_num_[2]};
size_t out_step_size[3] = {IntToSize(strides_[0]) * output_element_num_[0],
IntToSize(strides_[1]) * output_element_num_[1],
IntToSize(strides_[2]) * output_element_num_[2]};
auto in_n_offset = 0;
auto out_n_offset = out_start_offset[0];
for (int i = begin_[0]; stride_signs[0] * i < stride_signs[0] * end_[0];
@@ -138,7 +140,7 @@ bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inp
continue;
}
for (int m = begin_[3]; stride_signs[3] * m < stride_signs[3] * end_[3]; m += strides_[3]) {
output_addr[out_n_offset + out_c_offset + out_h_offset + m] = *input_addr++;
output_addr[out_n_offset + out_c_offset + out_h_offset + IntToSize(m)] = *input_addr++;
}
}
}


+ 4
- 4
mindspore/ccsrc/backend/kernel_compiler/cpu/topk_cpu_kernel.cc View File

@@ -42,7 +42,7 @@ void TopKCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st
if (k < 1) {
MS_LOG(EXCEPTION) << "Input k must > 0!";
}
int k_num = std::min<int>(inner_size_, k);
size_t k_num = IntToSize(std::min<int>(inner_size_, k));
if (outputs[0]->size != outer_size_ * k_num * sizeof(T)) {
MS_LOG(EXCEPTION) << "Error output data size!";
}
@@ -54,10 +54,10 @@ void TopKCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st
[&input](size_t index_1, size_t index_2) { return input[index_1] > input[index_2]; });
auto base_output = i * k_num;
if (!sorted_) {
std::stable_sort(idx.begin(), idx.begin() + k_num);
std::stable_sort(idx.begin(), idx.begin() + SizeToLong(k_num));
}
for (int j = 0; j < k_num; ++j) {
indices[base_output + j] = idx[j] - base_input;
for (size_t j = 0; j < k_num; ++j) {
indices[base_output + j] = SizeToInt(idx[j]) - SizeToInt(base_input);
output[base_output + j] = input[idx[j]];
}
}


Loading…
Cancel
Save