| @@ -26,29 +26,39 @@ namespace { | |||
| constexpr size_t kAdamInputsNum = 10; | |||
| constexpr size_t kAdamOutputsNum = 3; | |||
| constexpr size_t kScalarIndex = 0; | |||
| constexpr size_t kIndexVar = 0; | |||
| constexpr size_t kIndexM = 1; | |||
| constexpr size_t kIndexV = 2; | |||
| constexpr size_t kIndexBeta1Power = 3; | |||
| constexpr size_t kIndexBeta2Power = 4; | |||
| constexpr size_t kIndexLr = 5; | |||
| constexpr size_t kIndexBeta1 = 6; | |||
| constexpr size_t kIndexBeta2 = 7; | |||
| constexpr size_t kIndexEpsilon = 8; | |||
| constexpr size_t kIndexGrad = 9; | |||
| constexpr float kAdamBlock = 1000; | |||
| } // namespace | |||
| template <typename T> | |||
| void AdamCpuKernelMod::LaunchAdam(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &) { | |||
| T *var = reinterpret_cast<T *>(inputs[VAR]->addr); | |||
| T *m = reinterpret_cast<T *>(inputs[M]->addr); | |||
| T *v = reinterpret_cast<T *>(inputs[V]->addr); | |||
| float beta1_power = reinterpret_cast<float *>(inputs[BETA1_POWER]->addr)[kScalarIndex]; | |||
| float beta2_power = reinterpret_cast<float *>(inputs[BETA2_POWER]->addr)[kScalarIndex]; | |||
| float lr = reinterpret_cast<float *>(inputs[LR]->addr)[kScalarIndex]; | |||
| T beta1 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]); | |||
| T beta2 = static_cast<T>(reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]); | |||
| T epsilon = static_cast<T>(reinterpret_cast<float *>(inputs[EPSILON]->addr)[kScalarIndex]); | |||
| T *gradient = reinterpret_cast<T *>(inputs[GRAD]->addr); | |||
| T *var = reinterpret_cast<T *>(inputs[kIndexVar]->addr); | |||
| T *m = reinterpret_cast<T *>(inputs[kIndexM]->addr); | |||
| T *v = reinterpret_cast<T *>(inputs[kIndexV]->addr); | |||
| float beta1_power = reinterpret_cast<float *>(inputs[kIndexBeta1Power]->addr)[kScalarIndex]; | |||
| float beta2_power = reinterpret_cast<float *>(inputs[kIndexBeta2Power]->addr)[kScalarIndex]; | |||
| float lr = reinterpret_cast<float *>(inputs[kIndexLr]->addr)[kScalarIndex]; | |||
| T beta1 = static_cast<T>(reinterpret_cast<float *>(inputs[kIndexBeta1]->addr)[kScalarIndex]); | |||
| T beta2 = static_cast<T>(reinterpret_cast<float *>(inputs[kIndexBeta2]->addr)[kScalarIndex]); | |||
| T epsilon = static_cast<T>(reinterpret_cast<float *>(inputs[kIndexEpsilon]->addr)[kScalarIndex]); | |||
| T *gradient = reinterpret_cast<T *>(inputs[kIndexGrad]->addr); | |||
| constexpr float ONE = 1.0; | |||
| if (beta1_power - ONE == 0) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'beta1_power' can't be set 1."; | |||
| } | |||
| T new_lr = static_cast<T>(lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power)); | |||
| // multithreading | |||
| size_t lens = inputs[VAR]->size > 0 ? static_cast<size_t>(inputs[VAR]->size / sizeof(T)) : 1; | |||
| size_t lens = inputs[kIndexVar]->size > 0 ? static_cast<size_t>(inputs[kIndexVar]->size / sizeof(T)) : 1; | |||
| auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t end) { | |||
| T one = static_cast<T>(1.0); | |||
| for (size_t i = start; i < end; i++) { | |||
| @@ -67,16 +77,16 @@ void AdamCpuKernelMod::LaunchAdam(const std::vector<kernel::AddressPtr> &inputs, | |||
| void AdamCpuKernelMod::LaunchAdamNnacl(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &) { | |||
| float *var = reinterpret_cast<float *>(inputs[VAR]->addr); | |||
| float *m = reinterpret_cast<float *>(inputs[M]->addr); | |||
| float *v = reinterpret_cast<float *>(inputs[V]->addr); | |||
| float beta1_power = reinterpret_cast<float *>(inputs[BETA1_POWER]->addr)[kScalarIndex]; | |||
| float beta2_power = reinterpret_cast<float *>(inputs[BETA2_POWER]->addr)[kScalarIndex]; | |||
| float lr = reinterpret_cast<float *>(inputs[LR]->addr)[kScalarIndex]; | |||
| float beta1 = reinterpret_cast<float *>(inputs[BETA1]->addr)[kScalarIndex]; | |||
| float beta2 = reinterpret_cast<float *>(inputs[BETA2]->addr)[kScalarIndex]; | |||
| float epsilon = reinterpret_cast<float *>(inputs[EPSILON]->addr)[kScalarIndex]; | |||
| float *gradient = reinterpret_cast<float *>(inputs[GRAD]->addr); | |||
| float *var = reinterpret_cast<float *>(inputs[kIndexVar]->addr); | |||
| float *m = reinterpret_cast<float *>(inputs[kIndexM]->addr); | |||
| float *v = reinterpret_cast<float *>(inputs[kIndexV]->addr); | |||
| float beta1_power = reinterpret_cast<float *>(inputs[kIndexBeta1Power]->addr)[kScalarIndex]; | |||
| float beta2_power = reinterpret_cast<float *>(inputs[kIndexBeta2Power]->addr)[kScalarIndex]; | |||
| float lr = reinterpret_cast<float *>(inputs[kIndexLr]->addr)[kScalarIndex]; | |||
| float beta1 = reinterpret_cast<float *>(inputs[kIndexBeta1]->addr)[kScalarIndex]; | |||
| float beta2 = reinterpret_cast<float *>(inputs[kIndexBeta2]->addr)[kScalarIndex]; | |||
| float epsilon = reinterpret_cast<float *>(inputs[kIndexEpsilon]->addr)[kScalarIndex]; | |||
| float *gradient = reinterpret_cast<float *>(inputs[kIndexGrad]->addr); | |||
| constexpr float ONE = 1.0; | |||
| if (beta1_power - ONE == 0) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'beta1_power' can't be set 1."; | |||
| @@ -84,7 +94,7 @@ void AdamCpuKernelMod::LaunchAdamNnacl(const std::vector<kernel::AddressPtr> &in | |||
| float new_lr = lr * std::sqrt(ONE - beta2_power) / (ONE - beta1_power); | |||
| // multithreading | |||
| size_t lens = inputs[VAR]->size > 0 ? static_cast<size_t>(inputs[VAR]->size / sizeof(float)) : 1; | |||
| size_t lens = inputs[kIndexVar]->size > 0 ? static_cast<size_t>(inputs[kIndexVar]->size / sizeof(float)) : 1; | |||
| auto task = [this, &var, &m, &v, &gradient, new_lr, beta1, beta2, epsilon](size_t start, size_t end) { | |||
| int ret = AdamFp32(var, m, v, new_lr, beta1, beta2, epsilon, gradient, start, end, use_nesterov_); | |||
| if (ret != NNACL_OK) { | |||
| @@ -110,45 +120,45 @@ bool AdamCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, con | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kAdamInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kAdamOutputsNum, kernel_name_); | |||
| if (inputs[VAR]->size != inputs[M]->size) { | |||
| if (inputs[kIndexVar]->size != inputs[kIndexM]->size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << "', the shape and dtype of 'm' and 'var' should be same, but got the memory size of 'm': " | |||
| << inputs[M]->size << " and 'var': " << inputs[VAR]->size; | |||
| << inputs[kIndexM]->size << " and 'var': " << inputs[kIndexVar]->size; | |||
| } | |||
| if (inputs[VAR]->size != inputs[V]->size) { | |||
| if (inputs[kIndexVar]->size != inputs[kIndexV]->size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << "', the shape and dtype of 'v' and 'var' should be same, but got the memory size of 'v': " | |||
| << inputs[V]->size << " and 'var': " << inputs[VAR]->size; | |||
| << inputs[kIndexV]->size << " and 'var': " << inputs[kIndexVar]->size; | |||
| } | |||
| if (inputs[VAR]->size != inputs[GRAD]->size) { | |||
| if (inputs[kIndexVar]->size != inputs[kIndexGrad]->size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << "', the shape and dtype of 'gradient' and 'var' should be same, but got " | |||
| "the memory size of 'gradient': " | |||
| << inputs[GRAD]->size << " and 'var': " << inputs[VAR]->size; | |||
| << inputs[kIndexGrad]->size << " and 'var': " << inputs[kIndexVar]->size; | |||
| } | |||
| size_t f_size = sizeof(float); | |||
| if (inputs[BETA1_POWER]->size != f_size) { | |||
| if (inputs[kIndexBeta1Power]->size != f_size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << "', the 'beta1_power' should be float, but got 'beta1_power': " << inputs[BETA1_POWER]; | |||
| << "', the 'beta1_power' should be float, but got 'beta1_power': " << inputs[kIndexBeta1Power]; | |||
| } | |||
| if (inputs[BETA2_POWER]->size != f_size) { | |||
| if (inputs[kIndexBeta2Power]->size != f_size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << "', the 'beta2_power' should be float, but got 'beta2_power': " << inputs[BETA2_POWER]; | |||
| << "', the 'beta2_power' should be float, but got 'beta2_power': " << inputs[kIndexBeta2Power]; | |||
| } | |||
| if (inputs[LR]->size != f_size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'lr' should be float, but got 'lr': " << inputs[LR]; | |||
| if (inputs[kIndexLr]->size != f_size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'lr' should be float, but got 'lr': " << inputs[kIndexLr]; | |||
| } | |||
| if (inputs[BETA1]->size != f_size) { | |||
| if (inputs[kIndexBeta1]->size != f_size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << "', the 'beta1' should be float, but got 'beta1': " << inputs[BETA1]; | |||
| << "', the 'beta1' should be float, but got 'beta1': " << inputs[kIndexBeta1]; | |||
| } | |||
| if (inputs[BETA2]->size != f_size) { | |||
| if (inputs[kIndexBeta2]->size != f_size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << "', the 'beta2' should be float, but got 'beta2': " << inputs[BETA2]; | |||
| << "', the 'beta2' should be float, but got 'beta2': " << inputs[kIndexBeta2]; | |||
| } | |||
| if (inputs[EPSILON]->size != f_size) { | |||
| if (inputs[kIndexEpsilon]->size != f_size) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| << "', the 'epsilon' should be float, but got 'epsilon': " << inputs[EPSILON]; | |||
| << "', the 'epsilon' should be float, but got 'epsilon': " << inputs[kIndexEpsilon]; | |||
| } | |||
| if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -40,7 +40,6 @@ class AdamCpuKernelMod : public NativeCpuKernelMod { | |||
| void LaunchAdamNnacl(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| bool use_nesterov_{false}; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| enum input_list_ { VAR, M, V, BETA1_POWER, BETA2_POWER, LR, BETA1, BETA2, EPSILON, GRAD }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -64,9 +64,7 @@ template <typename T> | |||
| bool ArgMaxWithValueCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (!check_validation<T>(shape_, num_before_axis_, num_after_axis_, inputs, outputs)) { | |||
| return false; | |||
| } | |||
| (void)check_validation<T>(shape_, num_before_axis_, num_after_axis_, inputs, outputs); | |||
| auto input = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto output0 = reinterpret_cast<int32_t *>(outputs[0]->addr); | |||
| @@ -58,7 +58,7 @@ void BoundingBoxEncodeCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { | |||
| } else if (common::AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) { | |||
| float mean = common::AnfAlgo::GetNodeAttr<float>(kernel_node, "means"); | |||
| for (size_t i = 0; i < coordinate_size; i++) { | |||
| means_.emplace_back(mean); | |||
| (void)means_.emplace_back(mean); | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| @@ -71,7 +71,7 @@ void BoundingBoxEncodeCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { | |||
| } else if (common::AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) { | |||
| float std = common::AnfAlgo::GetNodeAttr<float>(kernel_node, "stds"); | |||
| for (size_t i = 0; i < coordinate_size; i++) { | |||
| stds_.emplace_back(std); | |||
| (void)stds_.emplace_back(std); | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ | |||
| @@ -142,7 +142,7 @@ void LayerNormGradCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inpu | |||
| task1(i); | |||
| return common::SUCCESS; | |||
| }; | |||
| tasks1.emplace_back(block); | |||
| (void)tasks1.emplace_back(block); | |||
| } | |||
| ParallelLaunch(tasks1); | |||
| for (size_t i = 0; i < thread_num2; ++i) { | |||
| @@ -150,7 +150,7 @@ void LayerNormGradCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inpu | |||
| task2(i); | |||
| return common::SUCCESS; | |||
| }; | |||
| tasks2.emplace_back(block); | |||
| (void)tasks2.emplace_back(block); | |||
| } | |||
| ParallelLaunch(tasks2); | |||
| } | |||
| @@ -23,14 +23,17 @@ namespace kernel { | |||
| namespace { | |||
| constexpr size_t kMaskedSelectGradInputsNum = 3; | |||
| constexpr size_t kMaskedSelectGradOutputsNum = 1; | |||
| constexpr size_t kIndexInput = 0; | |||
| constexpr size_t kIndexMask = 1; | |||
| constexpr size_t kIndexGrad = 2; | |||
| } // namespace | |||
| void MaskedSelectGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); | |||
| input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, INPUT); | |||
| input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, MASK); | |||
| grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, GRAD); | |||
| input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndexInput); | |||
| input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndexMask); | |||
| grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndexGrad); | |||
| output_shape_ = CPUKernelUtils::GetBroadcastShape(input_shape_a_, input_shape_b_); | |||
| for (const uint64_t &d : output_shape_) { | |||
| tensor_size_ *= d; | |||
| @@ -49,9 +52,9 @@ bool MaskedSelectGradCpuKernelMod::LaunchKernel(const std::vector<kernel::Addres | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaskedSelectGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaskedSelectGradOutputsNum, kernel_name_); | |||
| auto mask = reinterpret_cast<bool *>(inputs[MASK]->addr); | |||
| auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr); | |||
| auto dx = reinterpret_cast<T *>(outputs[INPUT]->addr); | |||
| auto mask = reinterpret_cast<bool *>(inputs[kIndexMask]->addr); | |||
| auto grad = reinterpret_cast<T *>(inputs[kIndexGrad]->addr); | |||
| auto dx = reinterpret_cast<T *>(outputs[kIndexInput]->addr); | |||
| auto ret = memset_s(dx, outputs[0]->size, 0, outputs[0]->size); | |||
| if (ret != EOK) { | |||
| @@ -53,7 +53,6 @@ class MaskedSelectGradCpuKernelMod : public NativeCpuKernelMod { | |||
| std::vector<size_t> grad_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| uint64_t tensor_size_ = 1; | |||
| enum input_list_ { INPUT, MASK, GRAD }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -20,6 +20,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| const size_t kIndexDataBuff = 0; | |||
| const size_t kIndexIndexBuff = 1; | |||
| const size_t kIndexRowMask = 2; | |||
| const size_t kIndexOutput = 0; | |||
| const size_t kIndexSelIdx = 1; | |||
| const size_t kIndexSelBoxes = 2; | |||
| } // namespace | |||
| uint32_t NmsRoundUpPower2(int v) { | |||
| constexpr uint32_t ONE = 1, TWO = 2, FOUR = 4, EIGHT = 8, SIXTEEN = 16; | |||
| v--; | |||
| @@ -54,7 +63,7 @@ void NMSWithMaskCpuKernelMod::NmsBitonicSortByKeyKernel(const int inner, const s | |||
| for (size_t i = 2; i <= ceil_power2; i <<= 1) { | |||
| for (size_t j = (i >> 1); j > 0; j >>= 1) { | |||
| auto task2 = [&](size_t start, size_t end) { | |||
| auto task2 = [i, j, &data_buff, &index_buff](size_t start, size_t end) { | |||
| for (size_t tid = start; tid < end; tid++) { | |||
| size_t tid_comp = tid ^ j; | |||
| if (tid_comp > tid) { | |||
| @@ -237,12 +246,12 @@ bool NMSWithMaskCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto input = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto data_buff = reinterpret_cast<T *>(workspace[DATA_BUFF]->addr); | |||
| auto index_buff = reinterpret_cast<int *>(workspace[INDEX_BUFF]->addr); | |||
| auto row_mask = reinterpret_cast<bool *>(workspace[ROW_MASK]->addr); | |||
| auto output = reinterpret_cast<T *>(outputs[OUTPUT]->addr); | |||
| auto sel_idx = reinterpret_cast<int *>(outputs[SEL_IDX]->addr); | |||
| auto sel_boxes = reinterpret_cast<bool *>(outputs[SEL_BOXES]->addr); | |||
| auto data_buff = reinterpret_cast<T *>(workspace[kIndexDataBuff]->addr); | |||
| auto index_buff = reinterpret_cast<int *>(workspace[kIndexIndexBuff]->addr); | |||
| auto row_mask = reinterpret_cast<bool *>(workspace[kIndexRowMask]->addr); | |||
| auto output = reinterpret_cast<T *>(outputs[kIndexOutput]->addr); | |||
| auto sel_idx = reinterpret_cast<int *>(outputs[kIndexSelIdx]->addr); | |||
| auto sel_boxes = reinterpret_cast<bool *>(outputs[kIndexSelBoxes]->addr); | |||
| NmsBitonicSortByKeyKernel<T>(num_input_, ceil_power_2, input, data_buff, index_buff, box_size_); | |||
| size_t total_val = IntToSize(num_input_ * num_input_); | |||
| @@ -80,8 +80,6 @@ class NMSWithMaskCpuKernelMod : public NativeCpuKernelMod { | |||
| float iou_value_{0.0}; | |||
| size_t ceil_power_2{0}; | |||
| static const int box_size_ = 5; // pre_defined box width | |||
| enum workspace_list_ { DATA_BUFF, INDEX_BUFF, ROW_MASK }; | |||
| enum output_list_ { OUTPUT, SEL_IDX, SEL_BOXES }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -180,12 +180,11 @@ bool RandomChoiceWithMaskCpuKernelMod::Launch(const std::vector<kernel::AddressP | |||
| } | |||
| } | |||
| int32_t copy_output_length = 0; | |||
| if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) { | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', output size exceed INT_MAX."; | |||
| } | |||
| copy_output_length = output_length * input_dim_size; | |||
| int32_t copy_output_length = output_length * input_dim_size; | |||
| (void)memset_s(output, IntToSize(copy_output_length), 0X00, IntToSize(copy_output_length)); | |||
| ParseOutputCoordinate(dims, output_length, input_dim_size, input_total_count, tmp_output, output); | |||
| @@ -37,9 +37,9 @@ class RandomChoiceWithMaskCpuKernelMod : public NativeCpuKernelMod { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| protected: | |||
| std::vector<KernelAttr> GetOpSupport() override { | |||
| static std::vector<KernelAttr> support_list = { | |||
| KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool)}; | |||
| @@ -97,6 +97,9 @@ bool ResizeBilinearCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inp | |||
| MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dtype of input should be float16 or float32, but got " | |||
| << TypeIdLabel(dtype_); | |||
| } | |||
| MS_EXCEPTION_IF_NULL(output_addr_T2); | |||
| MS_EXCEPTION_IF_NULL(float_input_addr); | |||
| MS_EXCEPTION_IF_NULL(float_output_addr); | |||
| size_t batch_size = shape_[0]; | |||
| size_t channel = shape_[1]; | |||
| @@ -108,6 +108,10 @@ bool ResizeBilinearGradCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> | |||
| << TypeIdLabel(dtype_); | |||
| } | |||
| MS_EXCEPTION_IF_NULL(output_addr); | |||
| MS_EXCEPTION_IF_NULL(float_dloss_addr); | |||
| MS_EXCEPTION_IF_NULL(float_output_addr); | |||
| size_t batch_size = shape_[0]; | |||
| size_t channel = shape_[1]; | |||
| size_t in_height = shape_[2]; | |||
| @@ -240,7 +240,7 @@ void ROIAlignCpuKernelFunc<T>::bin_box(int thread_idx, const T *roi_boxes, int r | |||
| const T *roi_box = roi_boxes + (*n) * roi_cols; | |||
| int roi_batch_ind = 0; | |||
| if (roi_cols == ROIS_COLS) { | |||
| roi_batch_ind = FloatToInt(rint(static_cast<float>(roi_box[0]) + eps)); | |||
| roi_batch_ind = FloatToInt(rintf(static_cast<float>(roi_box[0]) + eps)); | |||
| roi_box++; | |||
| } | |||
| @@ -96,6 +96,8 @@ void AtomicAdd(T *const address, const T val) { | |||
| AtomicAddTask<T, int64_t>(address, val); | |||
| break; | |||
| } | |||
| default: | |||
| MS_LOG(EXCEPTION) << "For 'ROIAlignGrad', the dtype " << typeid(T).name() << " is unsupported."; | |||
| } | |||
| } | |||
| @@ -299,7 +301,7 @@ void ROIAlignGradCpuKernelFunc<T>::bin_box(int thread_idx, const T *roi_boxes, i | |||
| const T *roi_box = roi_boxes + (*n) * roi_cols; | |||
| int roi_batch_ind = 0; | |||
| if (roi_cols == ROIS_COLS) { | |||
| roi_batch_ind = FloatToInt(rint(static_cast<float>(roi_box[0]) + eps)); | |||
| roi_batch_ind = FloatToInt(rintf(static_cast<float>(roi_box[0]) + eps)); | |||
| roi_box++; | |||
| } | |||
| @@ -25,6 +25,12 @@ namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSGDInputsNum = 6; | |||
| constexpr size_t kSGDOutputsNum = 1; | |||
| constexpr size_t kIndexParm = 0; | |||
| constexpr size_t kIndexGrad = 1; | |||
| constexpr size_t kIndexLr = 2; | |||
| constexpr size_t kIndexAccum = 3; | |||
| constexpr size_t kIndexMomentum = 4; | |||
| constexpr size_t kIndexStat = 5; | |||
| } // namespace | |||
| void SGDCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| @@ -45,12 +51,12 @@ template <typename T> | |||
| bool SGDCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSGDInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSGDOutputsNum, kernel_name_); | |||
| auto param = reinterpret_cast<T *>(inputs[PARAM]->addr); | |||
| auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr); | |||
| auto lr = reinterpret_cast<T *>(inputs[LR]->addr); | |||
| auto accum = reinterpret_cast<T *>(inputs[ACCUM]->addr); | |||
| auto momentum = reinterpret_cast<T *>(inputs[MOMENTUM]->addr); | |||
| auto stat = reinterpret_cast<T *>(inputs[STAT]->addr); | |||
| auto param = reinterpret_cast<T *>(inputs[kIndexParm]->addr); | |||
| auto grad = reinterpret_cast<T *>(inputs[kIndexGrad]->addr); | |||
| auto lr = reinterpret_cast<T *>(inputs[kIndexLr]->addr); | |||
| auto accum = reinterpret_cast<T *>(inputs[kIndexAccum]->addr); | |||
| auto momentum = reinterpret_cast<T *>(inputs[kIndexMomentum]->addr); | |||
| auto stat = reinterpret_cast<T *>(inputs[kIndexStat]->addr); | |||
| auto output_param = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t elem_num = inputs[0]->size / sizeof(T); | |||
| @@ -51,7 +51,6 @@ class SGDCpuKernelMod : public NativeCpuKernelMod { | |||
| float dampening_{0.0}; | |||
| float weight_decay_{0.0}; | |||
| bool nesterov_{true}; | |||
| enum input_list_ { PARAM, GRAD, LR, ACCUM, MOMENTUM, STAT }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||