Browse Source

!31540 clean code for cpu

Merge pull request !31540 from liangxhao/cleancode_master
r1.7
i-robot Gitee 4 years ago
parent
commit
ffb5165bc6
No known key found for this signature in database GPG Key ID: 173E9B9CA92EEF8F
17 changed files with 105 additions and 76 deletions
  1. +50
    -40
      mindspore/ccsrc/plugin/device/cpu/kernel/adam_cpu_kernel.cc
  2. +0
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/adam_cpu_kernel.h
  3. +1
    -3
      mindspore/ccsrc/plugin/device/cpu/kernel/argmax_with_value_cpu_kernel.cc
  4. +2
    -2
      mindspore/ccsrc/plugin/device/cpu/kernel/boundingbox_encode_cpu_kernel.cc
  5. +2
    -2
      mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_grad_cpu_kernel.cc
  6. +9
    -6
      mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.cc
  7. +0
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.h
  8. +16
    -7
      mindspore/ccsrc/plugin/device/cpu/kernel/nms_with_mask_cpu_kernel.cc
  9. +0
    -2
      mindspore/ccsrc/plugin/device/cpu/kernel/nms_with_mask_cpu_kernel.h
  10. +1
    -2
      mindspore/ccsrc/plugin/device/cpu/kernel/random_choice_with_mask_cpu_kernel.cc
  11. +1
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/random_choice_with_mask_cpu_kernel.h
  12. +3
    -0
      mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_cpu_kernel.cc
  13. +4
    -0
      mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_grad_cpu_kernel.cc
  14. +1
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_cpu_kernel.cc
  15. +3
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_grad_cpu_kernel.cc
  16. +12
    -6
      mindspore/ccsrc/plugin/device/cpu/kernel/sgd_cpu_kernel.cc
  17. +0
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/sgd_cpu_kernel.h

+ 50
- 40
mindspore/ccsrc/plugin/device/cpu/kernel/adam_cpu_kernel.cc View File

@@ -26,29 +26,39 @@ namespace {
constexpr size_t kAdamInputsNum = 10;
constexpr size_t kAdamOutputsNum = 3;
constexpr size_t kScalarIndex = 0;
constexpr size_t kIndexVar = 0;
constexpr size_t kIndexM = 1;
constexpr size_t kIndexV = 2;
constexpr size_t kIndexBeta1Power = 3;
constexpr size_t kIndexBeta2Power = 4;
constexpr size_t kIndexLr = 5;
constexpr size_t kIndexBeta1 = 6;
constexpr size_t kIndexBeta2 = 7;
constexpr size_t kIndexEpsilon = 8;
constexpr size_t kIndexGrad = 9;
constexpr float kAdamBlock = 1000;
} // namespace

template <typename T>
void AdamCpuKernelMod::LaunchAdam(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &) {
T *var = reinterpret_cast<T *>(inputs[VAR]->addr);
T *m = reinterpret_cast<T *>(inputs[M]->addr);
T *v = reinterpret_cast<T *>(inputs[V]->addr);
float beta1_power = reinterpret_cast<float *>(inputs[BETA1_POWER]->addr)[kScalarIndex];
float beta2_power = reinterpret_cast<float *>(inputs[BETA2_POWER]->addr)[kScalarIndex];
float lr = reinterpret_cast<float *>(inputs[LR]->addr)[kScalarIndex];
T beta1 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]);
T beta2 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]);
T epsilon = static_cast<T>(reinterpret_cast<float *>(inputs[EPSILON]->addr)[kScalarIndex]);
T *gradient = reinterpret_cast<T *>(inputs[GRAD]->addr);
T *var = reinterpret_cast<T *>(inputs[kIndexVar]->addr);
T *m = reinterpret_cast<T *>(inputs[kIndexM]->addr);
T *v = reinterpret_cast<T *>(inputs[kIndexV]->addr);
float beta1_power = reinterpret_cast<float *>(inputs[kIndexBeta1Power]->addr)[kScalarIndex];
float beta2_power = reinterpret_cast<float *>(inputs[kIndexBeta2Power]->addr)[kScalarIndex];
float lr = reinterpret_cast<float *>(inputs[kIndexLr]->addr)[kScalarIndex];
T beta1 = static_cast<T>(reinterpret_cast<float *>(inputs[kIndexBeta1]->addr)[kScalarIndex]);
T beta2 = static_cast<T>(reinterpret_cast<float *>(inputs[kIndexBeta2]->addr)[kScalarIndex]);
T epsilon = static_cast<T>(reinterpret_cast<float *>(inputs[kIndexEpsilon]->addr)[kScalarIndex]);
T *gradient = reinterpret_cast<T *>(inputs[kIndexGrad]->addr);
constexpr float ONE = 1.0;
if (beta1_power - ONE == 0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'beta1_power' can't be set 1.";
}
T new_lr = static_cast<T>(lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power));
// multithreading
size_t lens = inputs[VAR]->size > 0 ? static_cast<size_t>(inputs[VAR]->size / sizeof(T)) : 1;
size_t lens = inputs[kIndexVar]->size > 0 ? static_cast<size_t>(inputs[kIndexVar]->size / sizeof(T)) : 1;
auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t end) {
T one = static_cast<T>(1.0);
for (size_t i = start; i < end; i++) {
@@ -67,16 +77,16 @@ void AdamCpuKernelMod::LaunchAdam(const std::vector<kernel::AddressPtr> &inputs,

void AdamCpuKernelMod::LaunchAdamNnacl(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &) {
float *var = reinterpret_cast<float *>(inputs[VAR]->addr);
float *m = reinterpret_cast<float *>(inputs[M]->addr);
float *v = reinterpret_cast<float *>(inputs[V]->addr);
float beta1_power = reinterpret_cast<float *>(inputs[BETA1_POWER]->addr)[kScalarIndex];
float beta2_power = reinterpret_cast<float *>(inputs[BETA2_POWER]->addr)[kScalarIndex];
float lr = reinterpret_cast<float *>(inputs[LR]->addr)[kScalarIndex];
float beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex];
float beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex];
float epsilon = reinterpret_cast<float *>(inputs[EPSILON]->addr)[kScalarIndex];
float *gradient = reinterpret_cast<float *>(inputs[GRAD]->addr);
float *var = reinterpret_cast<float *>(inputs[kIndexVar]->addr);
float *m = reinterpret_cast<float *>(inputs[kIndexM]->addr);
float *v = reinterpret_cast<float *>(inputs[kIndexV]->addr);
float beta1_power = reinterpret_cast<float *>(inputs[kIndexBeta1Power]->addr)[kScalarIndex];
float beta2_power = reinterpret_cast<float *>(inputs[kIndexBeta2Power]->addr)[kScalarIndex];
float lr = reinterpret_cast<float *>(inputs[kIndexLr]->addr)[kScalarIndex];
float beta1 = reinterpret_cast<float *>(inputs[kIndexBeta1]->addr)[kScalarIndex];
float beta2 = reinterpret_cast<float *>(inputs[kIndexBeta2]->addr)[kScalarIndex];
float epsilon = reinterpret_cast<float *>(inputs[kIndexEpsilon]->addr)[kScalarIndex];
float *gradient = reinterpret_cast<float *>(inputs[kIndexGrad]->addr);
constexpr float ONE = 1.0;
if (beta1_power - ONE == 0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'beta1_power' can't be set 1.";
@@ -84,7 +94,7 @@ void AdamCpuKernelMod::LaunchAdamNnacl(const std::vector<kernel::AddressPtr> &in
float new_lr = lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power);

// multithreading
size_t lens = inputs[VAR]->size > 0 ? static_cast<size_t>(inputs[VAR]->size / sizeof(float)) : 1;
size_t lens = inputs[kIndexVar]->size > 0 ? static_cast<size_t>(inputs[kIndexVar]->size / sizeof(float)) : 1;
auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t end) {
int ret = AdamFp32(var, m, v, new_lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_);
if (ret != NNACL_OK) {
@@ -110,45 +120,45 @@ bool AdamCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, con
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kAdamInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kAdamOutputsNum, kernel_name_);

if (inputs[VAR]->size != inputs[M]->size) {
if (inputs[kIndexVar]->size != inputs[kIndexM]->size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the shape and dtype of 'm' and 'var' should be same, but got the memory size of 'm': "
<< inputs[M]->size << " and 'var': " << inputs[VAR]->size;
<< inputs[kIndexM]->size << " and 'var': " << inputs[kIndexVar]->size;
}
if (inputs[VAR]->size != inputs[V]->size) {
if (inputs[kIndexVar]->size != inputs[kIndexV]->size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the shape and dtype of 'v' and 'var' should be same, but got the memory size of 'v': "
<< inputs[V]->size << " and 'var': " << inputs[VAR]->size;
<< inputs[kIndexV]->size << " and 'var': " << inputs[kIndexVar]->size;
}
if (inputs[VAR]->size != inputs[GRAD]->size) {
if (inputs[kIndexVar]->size != inputs[kIndexGrad]->size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the shape and dtype of 'gradient' and 'var' should be same, but got "
"the memory size of 'gradient': "
<< inputs[GRAD]->size << " and 'var': " << inputs[VAR]->size;
<< inputs[kIndexGrad]->size << " and 'var': " << inputs[kIndexVar]->size;
}
size_t f_size = sizeof(float);
if (inputs[BETA1_POWER]->size != f_size) {
if (inputs[kIndexBeta1Power]->size != f_size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the 'beta1_power' should be float, but got 'beta1_power': " << inputs[BETA1_POWER];
<< "', the 'beta1_power' should be float, but got 'beta1_power': " << inputs[kIndexBeta1Power];
}
if (inputs[BETA2_POWER]->size != f_size) {
if (inputs[kIndexBeta2Power]->size != f_size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the 'beta2_power' should be float, but got 'beta2_power': " << inputs[BETA2_POWER];
<< "', the 'beta2_power' should be float, but got 'beta2_power': " << inputs[kIndexBeta2Power];
}
if (inputs[LR]->size != f_size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'lr' should be float, but got 'lr': " << inputs[LR];
if (inputs[kIndexLr]->size != f_size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'lr' should be float, but got 'lr': " << inputs[kIndexLr];
}
if (inputs[BETA1]->size != f_size) {
if (inputs[kIndexBeta1]->size != f_size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the 'beta1' should be float, but got 'beta1': " << inputs[BETA1];
<< "', the 'beta1' should be float, but got 'beta1': " << inputs[kIndexBeta1];
}
if (inputs[BETA2]->size != f_size) {
if (inputs[kIndexBeta2]->size != f_size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the 'beta2' should be float, but got 'beta2': " << inputs[BETA2];
<< "', the 'beta2' should be float, but got 'beta2': " << inputs[kIndexBeta2];
}
if (inputs[EPSILON]->size != f_size) {
if (inputs[kIndexEpsilon]->size != f_size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the 'epsilon' should be float, but got 'epsilon': " << inputs[EPSILON];
<< "', the 'epsilon' should be float, but got 'epsilon': " << inputs[kIndexEpsilon];
}

if (dtype_ == kNumberTypeFloat32) {


+ 0
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/adam_cpu_kernel.h View File

@@ -40,7 +40,6 @@ class AdamCpuKernelMod : public NativeCpuKernelMod {
void LaunchAdamNnacl(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
bool use_nesterov_{false};
TypeId dtype_{kTypeUnknown};
enum input_list_ { VAR, M, V, BETA1_POWER, BETA2_POWER, LR, BETA1, BETA2, EPSILON, GRAD };
};
} // namespace kernel
} // namespace mindspore


+ 1
- 3
mindspore/ccsrc/plugin/device/cpu/kernel/argmax_with_value_cpu_kernel.cc View File

@@ -64,9 +64,7 @@ template <typename T>
bool ArgMaxWithValueCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (!check_validation<T>(shape_, num_before_axis_, num_after_axis_, inputs, outputs)) {
return false;
}
(void)check_validation<T>(shape_, num_before_axis_, num_after_axis_, inputs, outputs);

auto input = reinterpret_cast<T *>(inputs[0]->addr);
auto output0 = reinterpret_cast<int32_t *>(outputs[0]->addr);


+ 2
- 2
mindspore/ccsrc/plugin/device/cpu/kernel/boundingbox_encode_cpu_kernel.cc View File

@@ -58,7 +58,7 @@ void BoundingBoxEncodeCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
} else if (common::AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
float mean = common::AnfAlgo::GetNodeAttr<float>(kernel_node, "means");
for (size_t i = 0; i < coordinate_size; i++) {
means_.emplace_back(mean);
(void)means_.emplace_back(mean);
}
} else {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
@@ -71,7 +71,7 @@ void BoundingBoxEncodeCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
} else if (common::AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
float std = common::AnfAlgo::GetNodeAttr<float>(kernel_node, "stds");
for (size_t i = 0; i < coordinate_size; i++) {
stds_.emplace_back(std);
(void)stds_.emplace_back(std);
}
} else {
MS_LOG(EXCEPTION) << "For '" << kernel_name_


+ 2
- 2
mindspore/ccsrc/plugin/device/cpu/kernel/layer_norm_grad_cpu_kernel.cc View File

@@ -142,7 +142,7 @@ void LayerNormGradCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inpu
task1(i);
return common::SUCCESS;
};
tasks1.emplace_back(block);
(void)tasks1.emplace_back(block);
}
ParallelLaunch(tasks1);
for (size_t i = 0; i < thread_num2; ++i) {
@@ -150,7 +150,7 @@ void LayerNormGradCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inpu
task2(i);
return common::SUCCESS;
};
tasks2.emplace_back(block);
(void)tasks2.emplace_back(block);
}
ParallelLaunch(tasks2);
}


+ 9
- 6
mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.cc View File

@@ -23,14 +23,17 @@ namespace kernel {
namespace {
constexpr size_t kMaskedSelectGradInputsNum = 3;
constexpr size_t kMaskedSelectGradOutputsNum = 1;
constexpr size_t kIndexInput = 0;
constexpr size_t kIndexMask = 1;
constexpr size_t kIndexGrad = 2;
} // namespace

void MaskedSelectGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, INPUT);
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, MASK);
grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, GRAD);
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndexInput);
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndexMask);
grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndexGrad);
output_shape_ = CPUKernelUtils::GetBroadcastShape(input_shape_a_, input_shape_b_);
for (const uint64_t &d : output_shape_) {
tensor_size_ *= d;
@@ -49,9 +52,9 @@ bool MaskedSelectGradCpuKernelMod::LaunchKernel(const std::vector<kernel::Addres
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaskedSelectGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaskedSelectGradOutputsNum, kernel_name_);
auto mask = reinterpret_cast<bool *>(inputs[MASK]->addr);
auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr);
auto dx = reinterpret_cast<T *>(outputs[INPUT]->addr);
auto mask = reinterpret_cast<bool *>(inputs[kIndexMask]->addr);
auto grad = reinterpret_cast<T *>(inputs[kIndexGrad]->addr);
auto dx = reinterpret_cast<T *>(outputs[kIndexInput]->addr);

auto ret = memset_s(dx, outputs[0]->size, 0, outputs[0]->size);
if (ret != EOK) {


+ 0
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/masked_select_grad_cpu_kernel.h View File

@@ -53,7 +53,6 @@ class MaskedSelectGradCpuKernelMod : public NativeCpuKernelMod {
std::vector<size_t> grad_shape_;
std::vector<size_t> output_shape_;
uint64_t tensor_size_ = 1;
enum input_list_ { INPUT, MASK, GRAD };
};
} // namespace kernel
} // namespace mindspore


+ 16
- 7
mindspore/ccsrc/plugin/device/cpu/kernel/nms_with_mask_cpu_kernel.cc View File

@@ -20,6 +20,15 @@

namespace mindspore {
namespace kernel {
namespace {
const size_t kIndexDataBuff = 0;
const size_t kIndexIndexBuff = 1;
const size_t kIndexRowMask = 2;
const size_t kIndexOutput = 0;
const size_t kIndexSelIdx = 1;
const size_t kIndexSelBoxes = 2;
} // namespace

uint32_t NmsRoundUpPower2(int v) {
constexpr uint32_t ONE = 1, TWO = 2, FOUR = 4, EIGHT = 8, SIXTEEN = 16;
v--;
@@ -54,7 +63,7 @@ void NMSWithMaskCpuKernelMod::NmsBitonicSortByKeyKernel(const int inner, const s

for (size_t i = 2; i <= ceil_power2; i <<= 1) {
for (size_t j = (i >> 1); j > 0; j >>= 1) {
auto task2 = [&](size_t start, size_t end) {
auto task2 = [i, j, &data_buff, &index_buff](size_t start, size_t end) {
for (size_t tid = start; tid < end; tid++) {
size_t tid_comp = tid ^ j;
if (tid_comp > tid) {
@@ -237,12 +246,12 @@ bool NMSWithMaskCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr>
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
auto input = reinterpret_cast<T *>(inputs[0]->addr);
auto data_buff = reinterpret_cast<T *>(workspace[DATA_BUFF]->addr);
auto index_buff = reinterpret_cast<int *>(workspace[INDEX_BUFF]->addr);
auto row_mask = reinterpret_cast<bool *>(workspace[ROW_MASK]->addr);
auto output = reinterpret_cast<T *>(outputs[OUTPUT]->addr);
auto sel_idx = reinterpret_cast<int *>(outputs[SEL_IDX]->addr);
auto sel_boxes = reinterpret_cast<bool *>(outputs[SEL_BOXES]->addr);
auto data_buff = reinterpret_cast<T *>(workspace[kIndexDataBuff]->addr);
auto index_buff = reinterpret_cast<int *>(workspace[kIndexIndexBuff]->addr);
auto row_mask = reinterpret_cast<bool *>(workspace[kIndexRowMask]->addr);
auto output = reinterpret_cast<T *>(outputs[kIndexOutput]->addr);
auto sel_idx = reinterpret_cast<int *>(outputs[kIndexSelIdx]->addr);
auto sel_boxes = reinterpret_cast<bool *>(outputs[kIndexSelBoxes]->addr);

NmsBitonicSortByKeyKernel<T>(num_input_, ceil_power_2, input, data_buff, index_buff, box_size_);
size_t total_val = IntToSize(num_input_ * num_input_);


+ 0
- 2
mindspore/ccsrc/plugin/device/cpu/kernel/nms_with_mask_cpu_kernel.h View File

@@ -80,8 +80,6 @@ class NMSWithMaskCpuKernelMod : public NativeCpuKernelMod {
float iou_value_{0.0};
size_t ceil_power_2{0};
static const int box_size_ = 5; // pre_defined box width
enum workspace_list_ { DATA_BUFF, INDEX_BUFF, ROW_MASK };
enum output_list_ { OUTPUT, SEL_IDX, SEL_BOXES };
};
} // namespace kernel
} // namespace mindspore


+ 1
- 2
mindspore/ccsrc/plugin/device/cpu/kernel/random_choice_with_mask_cpu_kernel.cc View File

@@ -180,12 +180,11 @@ bool RandomChoiceWithMaskCpuKernelMod::Launch(const std::vector<kernel::AddressP
}
}

int32_t copy_output_length = 0;
if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', output size exceed INT_MAX.";
}

copy_output_length = output_length * input_dim_size;
int32_t copy_output_length = output_length * input_dim_size;
(void)memset_s(output, IntToSize(copy_output_length), 0X00, IntToSize(copy_output_length));
ParseOutputCoordinate(dims, output_length, input_dim_size, input_total_count, tmp_output, output);



+ 1
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/random_choice_with_mask_cpu_kernel.h View File

@@ -37,9 +37,9 @@ class RandomChoiceWithMaskCpuKernelMod : public NativeCpuKernelMod {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

protected:
void InitInputOutputSize(const CNodePtr &kernel_node) override;

protected:
std::vector<KernelAttr> GetOpSupport() override {
static std::vector<KernelAttr> support_list = {
KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool)};


+ 3
- 0
mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_cpu_kernel.cc View File

@@ -97,6 +97,9 @@ bool ResizeBilinearCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inp
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dtype of input should be float16 or float32, but got "
<< TypeIdLabel(dtype_);
}
MS_EXCEPTION_IF_NULL(output_addr_T2);
MS_EXCEPTION_IF_NULL(float_input_addr);
MS_EXCEPTION_IF_NULL(float_output_addr);

size_t batch_size = shape_[0];
size_t channel = shape_[1];


+ 4
- 0
mindspore/ccsrc/plugin/device/cpu/kernel/resize_bilinear_grad_cpu_kernel.cc View File

@@ -108,6 +108,10 @@ bool ResizeBilinearGradCpuKernelMod::LaunchKernel(const std::vector<AddressPtr>
<< TypeIdLabel(dtype_);
}

MS_EXCEPTION_IF_NULL(output_addr);
MS_EXCEPTION_IF_NULL(float_dloss_addr);
MS_EXCEPTION_IF_NULL(float_output_addr);

size_t batch_size = shape_[0];
size_t channel = shape_[1];
size_t in_height = shape_[2];


+ 1
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_cpu_kernel.cc View File

@@ -240,7 +240,7 @@ void ROIAlignCpuKernelFunc<T>::bin_box(int thread_idx, const T *roi_boxes, int r
const T *roi_box = roi_boxes + (*n) * roi_cols;
int roi_batch_ind = 0;
if (roi_cols == ROIS_COLS) {
roi_batch_ind = FloatToInt(rint(static_cast<float>(roi_box[0]) + eps));
roi_batch_ind = FloatToInt(rintf(static_cast<float>(roi_box[0]) + eps));
roi_box++;
}



+ 3
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/roi_align_grad_cpu_kernel.cc View File

@@ -96,6 +96,8 @@ void AtomicAdd(T *const address, const T val) {
AtomicAddTask<T, int64_t>(address, val);
break;
}
default:
MS_LOG(EXCEPTION) << "For 'ROIAlignGrad', the dtype " << typeid(T).name() << " is unsupported.";
}
}

@@ -299,7 +301,7 @@ void ROIAlignGradCpuKernelFunc<T>::bin_box(int thread_idx, const T *roi_boxes, i
const T *roi_box = roi_boxes + (*n) * roi_cols;
int roi_batch_ind = 0;
if (roi_cols == ROIS_COLS) {
roi_batch_ind = FloatToInt(rint(static_cast<float>(roi_box[0]) + eps));
roi_batch_ind = FloatToInt(rintf(static_cast<float>(roi_box[0]) + eps));
roi_box++;
}



+ 12
- 6
mindspore/ccsrc/plugin/device/cpu/kernel/sgd_cpu_kernel.cc View File

@@ -25,6 +25,12 @@ namespace kernel {
namespace {
constexpr size_t kSGDInputsNum = 6;
constexpr size_t kSGDOutputsNum = 1;
constexpr size_t kIndexParm = 0;
constexpr size_t kIndexGrad = 1;
constexpr size_t kIndexLr = 2;
constexpr size_t kIndexAccum = 3;
constexpr size_t kIndexMomentum = 4;
constexpr size_t kIndexStat = 5;
} // namespace
void SGDCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
@@ -45,12 +51,12 @@ template <typename T>
bool SGDCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSGDInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSGDOutputsNum, kernel_name_);
auto param = reinterpret_cast<T *>(inputs[PARAM]->addr);
auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr);
auto lr = reinterpret_cast<T *>(inputs[LR]->addr);
auto accum = reinterpret_cast<T *>(inputs[ACCUM]->addr);
auto momentum = reinterpret_cast<T *>(inputs[MOMENTUM]->addr);
auto stat = reinterpret_cast<T *>(inputs[STAT]->addr);
auto param = reinterpret_cast<T *>(inputs[kIndexParm]->addr);
auto grad = reinterpret_cast<T *>(inputs[kIndexGrad]->addr);
auto lr = reinterpret_cast<T *>(inputs[kIndexLr]->addr);
auto accum = reinterpret_cast<T *>(inputs[kIndexAccum]->addr);
auto momentum = reinterpret_cast<T *>(inputs[kIndexMomentum]->addr);
auto stat = reinterpret_cast<T *>(inputs[kIndexStat]->addr);
auto output_param = reinterpret_cast<T *>(outputs[0]->addr);
size_t elem_num = inputs[0]->size / sizeof(T);



+ 0
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/sgd_cpu_kernel.h View File

@@ -51,7 +51,6 @@ class SGDCpuKernelMod : public NativeCpuKernelMod {
float dampening_{0.0};
float weight_decay_{0.0};
bool nesterov_{true};
enum input_list_ { PARAM, GRAD, LR, ACCUM, MOMENTUM, STAT };
};
} // namespace kernel
} // namespace mindspore


Loading…
Cancel
Save