| @@ -144,6 +144,30 @@ std::vector<int64_t> CalDimOffset(const std::vector<int64_t> &input_shape); | |||
| size_t GetCopySize(const std::vector<int64_t> &dim_offset, const std::vector<int64_t> &start, | |||
| const std::vector<int64_t> &stop); | |||
| size_t UnitSizeInBytes(const mindspore::TypeId &t); | |||
| #define CHECK_KERNEL_INPUTS_NUM(actual_inputs_num, expect_inputs_num, kernel_name) \ | |||
| do { \ | |||
| if ((actual_inputs_num) != (expect_inputs_num)) { \ | |||
| MS_LOG(EXCEPTION) << (kernel_name) << " requires " << (expect_inputs_num) << " inputs, but got " \ | |||
| << (actual_inputs_num) << "."; \ | |||
| } \ | |||
| } while (0) | |||
| #define CHECK_KERNEL_OUTPUTS_NUM(actual_outputs_num, expect_outputs_num, kernel_name) \ | |||
| do { \ | |||
| if ((actual_outputs_num) != (expect_outputs_num)) { \ | |||
| MS_LOG(EXCEPTION) << (kernel_name) << " should have " << (expect_outputs_num) << " outputs, but got " \ | |||
| << (actual_outputs_num) << "."; \ | |||
| } \ | |||
| } while (0) | |||
| #define CHECK_KERNEL_WORKSPACE_SIZE(actual_size, expect_size, kernel_name) \ | |||
| do { \ | |||
| if ((actual_size) != (expect_size)) { \ | |||
| MS_LOG(EXCEPTION) << (kernel_name) << " requires " << (expect_size) << " workspace, but got " << (actual_size) \ | |||
| << "."; \ | |||
| } \ | |||
| } while (0) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -44,7 +44,6 @@ class FusedPullWeightKernel : public CPUKernel { | |||
| if (inputs.size() != weight_full_names_.size()) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but FusedPullWeightKernel needs " | |||
| << weight_full_names_.size() << " weights as inputs."; | |||
| return false; | |||
| } | |||
| std::shared_ptr<fl::FBBuilder> fbb = std::make_shared<fl::FBBuilder>(); | |||
| @@ -67,7 +66,6 @@ class FusedPullWeightKernel : public CPUKernel { | |||
| MS_LOG(INFO) << "Launching pulling weight for federated learning iteration " << fl_iteration_; | |||
| if (!BuildPullWeightReq(fbb)) { | |||
| MS_LOG(EXCEPTION) << "Building request for FusedPullWeight failed."; | |||
| return false; | |||
| } | |||
| std::shared_ptr<std::vector<unsigned char>> pull_weight_rsp_msg = nullptr; | |||
| @@ -98,13 +96,11 @@ class FusedPullWeightKernel : public CPUKernel { | |||
| fbb = std::make_shared<fl::FBBuilder>(); | |||
| if (!BuildPullWeightReq(fbb)) { | |||
| MS_LOG(EXCEPTION) << "Building request for FusedDownloadWeightsByKeys failed."; | |||
| return false; | |||
| } | |||
| continue; | |||
| } else if (retcode != schema::ResponseCode_SUCCEED) { | |||
| MS_LOG(EXCEPTION) << "FusedPullWeight failed. Server return code: " << pull_weight_rsp->retcode() | |||
| << ", reason: " << pull_weight_rsp->reason()->str(); | |||
| return false; | |||
| } else { | |||
| MS_LOG(DEBUG) << "FusedPullWeight succeed."; | |||
| } | |||
| @@ -115,13 +111,11 @@ class FusedPullWeightKernel : public CPUKernel { | |||
| const std::string &weight_name = weight_full_names_[i]; | |||
| if (feature_map.count(weight_name) == 0) { | |||
| MS_LOG(EXCEPTION) << "The weights for " << weight_name << " is not pulled from server."; | |||
| return false; | |||
| } | |||
| int ret = | |||
| memcpy_s(inputs[i]->addr, inputs[i]->size, feature_map[weight_name].addr, feature_map[weight_name].size); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; | |||
| return false; | |||
| } | |||
| } | |||
| MS_LOG(INFO) << "Pull weights for " << weight_full_names_ << " success. Iteration: " << fl_iteration_; | |||
| @@ -147,7 +141,6 @@ class FusedPullWeightKernel : public CPUKernel { | |||
| MS_LOG(EXCEPTION) | |||
| << "Attributes of FusedPullWeightKernel are invalid: server number is 0 or weight_full_names_ is " | |||
| "empty or indices_ is UINT32_MAX."; | |||
| return; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| @@ -186,7 +179,6 @@ class FusedPullWeightKernel : public CPUKernel { | |||
| if (fbs_feature_map->size() != weight_full_names_.size()) { | |||
| MS_LOG(EXCEPTION) << "FusedPullWeightKernel should get " << weight_full_names_.size() << " weights, but got " | |||
| << fbs_feature_map->size() << " weights."; | |||
| return {}; | |||
| } | |||
| std::map<std::string, Address> feature_map; | |||
| @@ -42,7 +42,6 @@ class FusedPushWeightKernel : public CPUKernel { | |||
| if (inputs.size() != weight_full_names_.size()) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but FusedPushWeightKernel needs " | |||
| << weight_full_names_.size() << " weights as inputs."; | |||
| return false; | |||
| } | |||
| std::shared_ptr<fl::FBBuilder> fbb = std::make_shared<fl::FBBuilder>(); | |||
| @@ -65,7 +64,6 @@ class FusedPushWeightKernel : public CPUKernel { | |||
| MS_LOG(INFO) << "Launching pushing weight for federated learning iteration " << fl_iteration_; | |||
| if (!BuildPushWeightReq(fbb, inputs)) { | |||
| MS_LOG(EXCEPTION) << "Building request for FusedPushWeight failed."; | |||
| return false; | |||
| } | |||
| // The server number may change after scaling in/out. | |||
| @@ -97,13 +95,11 @@ class FusedPushWeightKernel : public CPUKernel { | |||
| << ". Retry later."; | |||
| if (!BuildPushWeightReq(fbb, inputs)) { | |||
| MS_LOG(EXCEPTION) << "Building request for FusedPushWeight failed."; | |||
| return false; | |||
| } | |||
| continue; | |||
| } else if (retcode != schema::ResponseCode_SUCCEED) { | |||
| MS_LOG(EXCEPTION) << "FusedPushWeight failed. Server return code: " << push_weight_rsp->retcode() | |||
| << ", reason: " << push_weight_rsp->reason()->str(); | |||
| return false; | |||
| } else { | |||
| MS_LOG(DEBUG) << "FusedPushWeight succeed."; | |||
| } | |||
| @@ -132,7 +128,6 @@ class FusedPushWeightKernel : public CPUKernel { | |||
| MS_LOG(EXCEPTION) | |||
| << "Attributes of FusedPushWeightKernel are invalid: server number is 0 or weight_full_names_ is " | |||
| "empty or indices_ is UINT32_MAX."; | |||
| return; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -24,17 +24,26 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kAddNInputsMinNum = 2; | |||
| constexpr size_t kAddNOutputsNum = 1; | |||
| void AddInt(const int *in_0, const int *in_1, int *out, int start, int end) { | |||
| int ret = ElementAddInt(in_0 + start, in_1 + start, out + start, end - start); | |||
| if (ret != NNACL_OK) { | |||
| MS_LOG(EXCEPTION) << "Add failed."; | |||
| } | |||
| } | |||
| } // namespace | |||
| void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_num_ = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num_ < kAddNInputsMinNum) { | |||
| MS_LOG(EXCEPTION) << "Input numbers should not less " << kAddNInputsMinNum << ", but got " << input_num_; | |||
| } | |||
| CheckParam(kernel_node); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| @@ -52,6 +61,8 @@ void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool AddNCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num_, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kAddNOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat32) { | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); | |||
| @@ -93,10 +104,6 @@ void AddNCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| MS_LOG(EXCEPTION) << "AddN input shapes must be equal."; | |||
| } | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but AddNCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/assignadd_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| @@ -20,13 +21,19 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kAssignAddInputsNum = 2; | |||
| constexpr size_t kAssignAddOutputsNum = 1; | |||
| } // namespace | |||
| void AssignAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| if (src1_shape.size() == 0 && src0_shape.size() == 0) { | |||
| src0_shape.insert(src0_shape.begin(), 1); | |||
| src1_shape.insert(src1_shape.begin(), 1); | |||
| (void)src0_shape.insert(src0_shape.begin(), 1); | |||
| (void)src1_shape.insert(src1_shape.begin(), 1); | |||
| } | |||
| if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) { | |||
| MS_LOG(EXCEPTION) << "AssignAdd only support same dim input or tensor * scalar " << src0_shape.size() << " vs " | |||
| @@ -49,9 +56,8 @@ void AssignAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool AssignAddCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2) { | |||
| MS_LOG(EXCEPTION) << "AssignAdd error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kAssignAddInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kAssignAddOutputsNum, kernel_name_); | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| @@ -59,7 +65,6 @@ bool AssignAddCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, c | |||
| auto ret = memcpy_s(inputs[0]->addr, inputs[0]->size, outputs[0]->addr, outputs[0]->size); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "Memcpy_s error, errorno " << ret; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/batch_norm_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| @@ -20,9 +21,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kBatchNormInputsNum = 5; | |||
| constexpr size_t kBatchNormOutputsNum = 5; | |||
| constexpr size_t kBatchNormInputShapeSize = 4; | |||
| constexpr size_t kBatchNormInputShapeSize2 = 2; | |||
| } // namespace | |||
| void BatchNormCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t type_size = sizeof(float); | |||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| size_t tensor_size = shape[1] * 2 * type_size; // [2, c] to store scale and bias | |||
| @@ -31,12 +38,13 @@ void BatchNormCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| void BatchNormCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| is_train = AnfAlgo::GetNodeAttr<bool>(kernel_node, "is_training"); | |||
| momentum = AnfAlgo::GetNodeAttr<float>(kernel_node, "momentum"); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (x_shape.size() == 2) { | |||
| (void)x_shape.insert(x_shape.end(), 2, 1); // expand 2 dim: NC -> NCHW | |||
| } else if (x_shape.size() != 4) { | |||
| if (x_shape.size() == kBatchNormInputShapeSize2) { | |||
| (void)x_shape.insert(x_shape.end(), kBatchNormInputShapeSize - kBatchNormInputShapeSize2, 1); | |||
| } else if (x_shape.size() != kBatchNormInputShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Batchnorm only support nchw input!"; | |||
| } | |||
| batch_size = x_shape[0]; | |||
| @@ -67,9 +75,8 @@ void BatchNormCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool BatchNormCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 5 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kBatchNormInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kBatchNormOutputsNum, kernel_name_); | |||
| auto wksp = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto scale_ret = memcpy_s(wksp, workspace[0]->size, inputs[1]->addr, inputs[1]->size); | |||
| auto max_size = workspace[0]->size - inputs[1]->size; | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/batch_norm_grad_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| @@ -20,9 +21,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kBatchNormGradInputsNum = 6; | |||
| constexpr size_t kBatchNormGradOutputsNum = 3; | |||
| constexpr size_t kBatchNormGradInputShapeSize = 4; | |||
| constexpr size_t kBatchNormGradInputShapeSize2 = 2; | |||
| } // namespace | |||
| void BatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t type_size = sizeof(float); | |||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, Y_BACKPROP); | |||
| size_t tensor_size = shape[C] * SCALE_SHIFT_NUM * type_size; | |||
| @@ -35,6 +42,7 @@ void BatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (x_shape.size() == NC) { | |||
| (void)x_shape.insert(x_shape.end(), (NCHW - NC), 1); | |||
| @@ -76,10 +84,9 @@ void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool BatchNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| constexpr size_t INPUT_NUM = 5; | |||
| if (inputs.size() < INPUT_NUM || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kBatchNormGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kBatchNormGradOutputsNum, kernel_name_); | |||
| auto wksp_in = reinterpret_cast<float *>(workspace[SCALE_BIAS]->addr); | |||
| auto scale_ret = memcpy_s(wksp_in, workspace[SCALE_BIAS]->size, inputs[SCALE]->addr, inputs[SCALE]->size); | |||
| if (scale_ret != 0) { | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h" | |||
| #include <string> | |||
| #include <algorithm> | |||
| @@ -22,12 +23,20 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kShapeSize2D = 2; | |||
| constexpr size_t kShapeSize4D = 4; | |||
| constexpr size_t kConv2dGradFilterInputsNum = 2; | |||
| constexpr size_t kConv2dGradFilterOutputsNum = 1; | |||
| } // namespace | |||
| void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> weight_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 || weight_shape.size() != 4) { | |||
| if (src_shape.size() != kShapeSize4D || weight_shape.size() != kShapeSize4D) { | |||
| MS_LOG(EXCEPTION) << ("Conv2d grad filter only support nchw input!"); | |||
| } | |||
| std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]}); | |||
| @@ -36,7 +45,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| if (src_shape[1] % group != 0) { | |||
| MS_LOG(EXCEPTION) << "Conv2d channels should be divided by group!"; | |||
| } | |||
| weight_shape.insert(weight_shape.begin(), group); | |||
| (void)weight_shape.insert(weight_shape.begin(), group); | |||
| weight_shape[1] = weight_shape[1] / group; | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| @@ -47,16 +56,19 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE); | |||
| auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION); | |||
| (void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| (void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| if (dilation_ori.size() != 4) { | |||
| if (dilation_ori.size() != kShapeSize4D) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation must be 4d!"; | |||
| } | |||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation only support 1 in N axis and C axis!"; | |||
| } | |||
| if (stride_ori.size() < kShapeSize2D) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel stride_ori should not less than 2d!"; | |||
| } | |||
| std::vector<int> stride{stride_ori[0], stride_ori[1]}; | |||
| std::vector<int> dilation{dilation_ori[2], dilation_ori[3]}; | |||
| dnnl::memory::dims strides{stride_ori[0], stride_ori[1]}; | |||
| @@ -91,9 +103,8 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool Conv2dGradFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kConv2dGradFilterInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConv2dGradFilterOutputsNum, kernel_name_); | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS, outputs[0]->addr); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h" | |||
| #include <string> | |||
| #include <map> | |||
| @@ -23,13 +24,21 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kConv2dGradInputInputsNum = 2; | |||
| constexpr size_t kConv2dGradInputOutputsNum = 1; | |||
| constexpr size_t kShapeSize2D = 2; | |||
| constexpr size_t kShapeSize4D = 4; | |||
| const std::map<std::string, size_t> kFormatIndexMap = {{"NCHW", 2}, {"HWCN", 0}, {"NHWC", 1}}; | |||
| } // namespace | |||
| void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 || weight_shape.size() != 4) { | |||
| if (src_shape.size() != kShapeSize4D || weight_shape.size() != kShapeSize4D) { | |||
| MS_LOG(EXCEPTION) << "Conv2d grad filter only support nchw input!"; | |||
| } | |||
| std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]}); | |||
| @@ -38,7 +47,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| if (src_shape[1] % group != 0) { | |||
| MS_LOG(EXCEPTION) << "Conv2d channels should be divided by group!"; | |||
| } | |||
| weight_shape.insert(weight_shape.begin(), group); | |||
| (void)weight_shape.insert(weight_shape.begin(), group); | |||
| weight_shape[1] = weight_shape[1] / group; | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| @@ -64,13 +73,15 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| (void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| if (dilation_ori.size() != 4) { | |||
| if (dilation_ori.size() != kShapeSize4D) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation must be 4d!"; | |||
| } | |||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation only support 1 in N axis and C axis!"; | |||
| } | |||
| if (stride_ori.size() < kShapeSize2D) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel stride_ori should not less than 2d!"; | |||
| } | |||
| std::vector<int> stride{stride_ori[0], stride_ori[1]}; | |||
| std::vector<int> dilation{dilation_ori[2], dilation_ori[3]}; | |||
| dnnl::memory::dims strides{stride_ori[0], stride_ori[1]}; | |||
| @@ -105,9 +116,8 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool Conv2dGradInputCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kConv2dGradInputInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConv2dGradInputOutputsNum, kernel_name_); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/conv_cpu_kernel.h" | |||
| #include <string> | |||
| #include <algorithm> | |||
| @@ -22,13 +23,17 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr size_t kConvInputTensorNum = 2; | |||
| namespace { | |||
| constexpr size_t kConvInputsNum = 2; | |||
| constexpr size_t kConvOutputsNum = 1; | |||
| constexpr size_t kShapeSize4D = 4; | |||
| constexpr size_t kShapeSize5D = 5; | |||
| constexpr size_t kKernelStartAxis = 2; | |||
| } // namespace | |||
| void ConvCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| @@ -59,9 +64,9 @@ void ConvCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, stride_attr); | |||
| auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, dilation_attr); | |||
| (void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| (void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| if (stride_ori.size() != src_dim) { | |||
| MS_LOG(EXCEPTION) << "Conv stride size must be " << src_dim << "D!"; | |||
| } | |||
| @@ -111,9 +116,8 @@ void ConvCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool ConvCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < kConvInputTensorNum || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kConvInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConvOutputsNum, kernel_name_); | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV_CPU_KERNEL_H_ | |||
| @@ -35,7 +36,6 @@ class ConvCPUKernel : public MKLCPUKernel { | |||
| MS_REG_CPU_KERNEL(Conv2D, KernelAttr(), ConvCPUKernel); | |||
| MS_REG_CPU_KERNEL(Conv3D, KernelAttr(), ConvCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -23,8 +23,17 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kAvgPoolingGradInputsNum = 3; | |||
| constexpr size_t kkAvgPoolingGradOutputsNum = 1; | |||
| constexpr size_t kAvgPoolingGradKernelSize = 4; | |||
| constexpr size_t kkAvgPoolingGradStrideSize = 4; | |||
| constexpr size_t kkAvgPoolingGradPadSize = 2; | |||
| } // namespace | |||
| void AvgPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| @@ -34,10 +43,10 @@ void AvgPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| std::vector<int64_t> kernel_sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, KERNEL_SIZE); | |||
| std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES); | |||
| (void)std::transform(kernel_sizes_me.begin(), kernel_sizes_me.end(), std::back_inserter(origin_kernel_sizes), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| (void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| if (origin_kernel_sizes.size() != 4 || strides.size() != 4) { | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| if (origin_kernel_sizes.size() != kAvgPoolingGradKernelSize || strides.size() != kkAvgPoolingGradStrideSize) { | |||
| MS_LOG(EXCEPTION) << "Invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size(); | |||
| } | |||
| std::vector<int> stride{strides[2], strides[3]}; | |||
| @@ -49,7 +58,7 @@ void AvgPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| std::vector<size_t> kernel_size({IntToSize(origin_kernel_sizes[2]), IntToSize(origin_kernel_sizes[3])}); | |||
| std::vector<int> dummy_dilation{1, 1}; | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r, dummy_dilation); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| if (int_padding_l.size() != kkAvgPoolingGradPadSize || int_padding_r.size() != kkAvgPoolingGradPadSize) { | |||
| MS_LOG(EXCEPTION) << "Pooling avg get padding failed"; | |||
| } | |||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||
| @@ -77,9 +86,8 @@ void AvgPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool AvgPoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 3 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Pooling avg grad error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kAvgPoolingGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kkAvgPoolingGradOutputsNum, kernel_name_); | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[2]->addr); | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_AVG_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_AVG_GRAD_CPU_KERNEL_H_ | |||
| @@ -23,8 +23,17 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kMaxPoolingGradInputsNum = 3; | |||
| constexpr size_t kMaxPoolingGradOutputsNum = 1; | |||
| constexpr size_t kMaxPoolingGradKernelSize = 4; | |||
| constexpr size_t kMaxPoolingGradStrideSize = 4; | |||
| constexpr size_t kMaxPoolingGradInputShapeSize = 4; | |||
| } // namespace | |||
| void MaxPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| dst_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<int> kernel_sizes; | |||
| @@ -32,10 +41,11 @@ void MaxPoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| auto kernel_sizes_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, KERNEL_SIZE); | |||
| auto strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES); | |||
| (void)std::transform(kernel_sizes_me.begin(), kernel_sizes_me.end(), std::back_inserter(kernel_sizes), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| (void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides), | |||
| [](const int64_t &value) { return static_cast<int>(value); }); | |||
| if (kernel_sizes.size() != 4 || strides.size() != 4 || src_shape_.size() != 4 || dst_shape_.size() != 4) { | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| if (kernel_sizes.size() != kMaxPoolingGradKernelSize || strides.size() != kMaxPoolingGradStrideSize || | |||
| src_shape_.size() != kMaxPoolingGradInputShapeSize || dst_shape_.size() != kMaxPoolingGradInputShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Pooling grad invalid input size!"; | |||
| } | |||
| std::vector<int> padding_r; | |||
| @@ -105,9 +115,8 @@ void MaxPoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float | |||
| bool MaxPoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 3 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Pooling grad error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaxPoolingGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaxPoolingGradOutputsNum, kernel_name_); | |||
| auto input = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto diff = reinterpret_cast<float *>(inputs[2]->addr); | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_MAX_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_MAX_GRAD_CPU_KERNEL_H_ | |||
| @@ -174,35 +174,14 @@ | |||
| } \ | |||
| } | |||
| #define TRANSPOSE_MULTI_DIMS(TYPE, NAME) \ | |||
| int Transpose##NAME(const TYPE *in_data, TYPE *out_data, const int *strides, const int *out_strides, \ | |||
| const int *perm, const int *output_shape, int dims, int *size, int *position) { \ | |||
| if (size == NULL || position == NULL) { \ | |||
| return NNACL_ERR; \ | |||
| } \ | |||
| *(size + dims - 1) = 1; \ | |||
| for (int i = dims - 1; i > 0; --i) { \ | |||
| *(size + i - 1) = *(size + i) * output_shape[i]; \ | |||
| } \ | |||
| for (int idx = 0; idx < (*size) * output_shape[0]; ++idx) { \ | |||
| int pos = idx; \ | |||
| int output_idx = 0; \ | |||
| int input_idx = 0; \ | |||
| for (int i = 0; i < dims; ++i) { \ | |||
| *(position + i) = pos / *(size + i); \ | |||
| int out_stride = i < dims - 1 ? out_strides[i] : 1; \ | |||
| output_idx += (*(position + i) * out_stride); \ | |||
| input_idx += (*(position + i) * strides[perm[i]]); \ | |||
| pos -= *(position + i) * (*(size + i)); \ | |||
| } \ | |||
| out_data[output_idx] = in_data[input_idx]; \ | |||
| } \ | |||
| return NNACL_OK; \ | |||
| } | |||
| #define TRANSPOSE_DIMS(TYPE, NAME) \ | |||
| void TransposeDims##NAME(const TYPE *in_data, TYPE *out_data, const int *output_shape, \ | |||
| const TransposeParameter *transpose_param, int task_id, int thread_num) { \ | |||
| NNACL_CHECK_NULL_RETURN_VOID(in_data); \ | |||
| NNACL_CHECK_NULL_RETURN_VOID(out_data); \ | |||
| NNACL_CHECK_NULL_RETURN_VOID(output_shape); \ | |||
| NNACL_CHECK_NULL_RETURN_VOID(transpose_param); \ | |||
| NNACL_CHECK_ZERO_RETURN(thread_num); \ | |||
| const int *perm = transpose_param->perm_; \ | |||
| const int *strides = transpose_param->strides_; \ | |||
| const int *out_strides = transpose_param->out_strides_; \ | |||
| @@ -220,6 +199,7 @@ | |||
| int output_idx = 0; \ | |||
| int input_idx = 0; \ | |||
| for (int i = 0; i < num_axes; ++i) { \ | |||
| NNACL_CHECK_ZERO_RETURN(*(out_strides + i)); \ | |||
| int position = pos / *(out_strides + i); \ | |||
| int out_stride = i < num_axes - 1 ? out_strides[i] : 1; \ | |||
| output_idx += (position * out_stride); \ | |||
| @@ -230,69 +210,48 @@ | |||
| } \ | |||
| } | |||
| #define DOTRANSPOSE(TYPE, NAME) \ | |||
| int DoTranspose##NAME(const TYPE *in_data, TYPE *out_data, const int *output_shape, \ | |||
| const TransposeParameter *transpose_param) { \ | |||
| if (in_data == NULL || out_data == NULL) { \ | |||
| return NNACL_ERR; \ | |||
| } \ | |||
| const int *perm = transpose_param->perm_; \ | |||
| const int *strides = transpose_param->strides_; \ | |||
| const int *out_strides = transpose_param->out_strides_; \ | |||
| int data_size = transpose_param->data_num_ * sizeof(TYPE); \ | |||
| int num_axes = transpose_param->num_axes_; \ | |||
| bool needTranspose = false; \ | |||
| for (int i = 1; i < num_axes; ++i) { \ | |||
| if (perm[i] - perm[i - 1] != 1) { \ | |||
| needTranspose = true; \ | |||
| break; \ | |||
| } \ | |||
| } \ | |||
| if (!needTranspose) { \ | |||
| (void)memcpy(out_data, in_data, data_size); \ | |||
| return NNACL_OK; \ | |||
| } \ | |||
| for (int i = 0; i < num_axes; ++i) { \ | |||
| if (perm[i] < 0) { \ | |||
| return NNACL_PARAM_INVALID; \ | |||
| } \ | |||
| } \ | |||
| if (num_axes == 2) { \ | |||
| TransposeDim2##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else if (num_axes == 3) { \ | |||
| TransposeDim3##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else if (num_axes == 4) { \ | |||
| TransposeDim4##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else if (num_axes == 5) { \ | |||
| TransposeDim5##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else if (num_axes == 6) { \ | |||
| TransposeDim6##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else { \ | |||
| int *size = (int *)(malloc(num_axes * sizeof(int))); \ | |||
| if (size == NULL) { \ | |||
| return NNACL_ERR; \ | |||
| } \ | |||
| int *position = (int *)(malloc(num_axes * sizeof(int))); \ | |||
| if (position == NULL) { \ | |||
| free(size); \ | |||
| size = NULL; \ | |||
| return NNACL_ERR; \ | |||
| } \ | |||
| int ret = \ | |||
| Transpose##NAME(in_data, out_data, strides, out_strides, perm, output_shape, num_axes, size, position); \ | |||
| if (size != NULL) { \ | |||
| free(size); \ | |||
| size = NULL; \ | |||
| } \ | |||
| if (position != NULL) { \ | |||
| free(position); \ | |||
| position = NULL; \ | |||
| } \ | |||
| if (ret != NNACL_OK) { \ | |||
| return NNACL_ERR; \ | |||
| } \ | |||
| } \ | |||
| return NNACL_OK; \ | |||
| #define DOTRANSPOSE(TYPE, NAME) \ | |||
| int DoTranspose##NAME(const TYPE *in_data, TYPE *out_data, const int *output_shape, \ | |||
| const TransposeParameter *transpose_param) { \ | |||
| NNACL_CHECK_NULL_RETURN_ERR(in_data); \ | |||
| NNACL_CHECK_NULL_RETURN_ERR(out_data); \ | |||
| NNACL_CHECK_NULL_RETURN_ERR(output_shape); \ | |||
| NNACL_CHECK_NULL_RETURN_ERR(transpose_param); \ | |||
| const int *perm = transpose_param->perm_; \ | |||
| const int *strides = transpose_param->strides_; \ | |||
| const int *out_strides = transpose_param->out_strides_; \ | |||
| int data_size = transpose_param->data_num_ * sizeof(TYPE); \ | |||
| int num_axes = transpose_param->num_axes_; \ | |||
| bool needTranspose = false; \ | |||
| for (int i = 1; i < num_axes; ++i) { \ | |||
| if (perm[i] - perm[i - 1] != 1) { \ | |||
| needTranspose = true; \ | |||
| break; \ | |||
| } \ | |||
| } \ | |||
| if (!needTranspose) { \ | |||
| (void)memcpy(out_data, in_data, data_size); \ | |||
| return NNACL_OK; \ | |||
| } \ | |||
| for (int i = 0; i < num_axes; ++i) { \ | |||
| if (perm[i] < 0) { \ | |||
| return NNACL_PARAM_INVALID; \ | |||
| } \ | |||
| } \ | |||
| if (num_axes == 2) { \ | |||
| TransposeDim2##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else if (num_axes == 3) { \ | |||
| TransposeDim3##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else if (num_axes == 4) { \ | |||
| TransposeDim4##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else if (num_axes == 5) { \ | |||
| TransposeDim5##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else if (num_axes == 6) { \ | |||
| TransposeDim6##NAME(in_data, out_data, strides, out_strides, perm, output_shape); \ | |||
| } else { \ | |||
| return NNACL_ERR; \ | |||
| } \ | |||
| return NNACL_OK; \ | |||
| } | |||
| #define TRANSPOSE_TEMPLATE(TYPE, NAME) \ | |||
| @@ -301,7 +260,6 @@ | |||
| TRANSPOSE_FOUR_DIMS(TYPE, NAME) \ | |||
| TRANSPOSE_FIVE_DIMS(TYPE, NAME) \ | |||
| TRANSPOSE_SIX_DIMS(TYPE, NAME) \ | |||
| TRANSPOSE_MULTI_DIMS(TYPE, NAME) \ | |||
| TRANSPOSE_DIMS(TYPE, NAME) \ | |||
| DOTRANSPOSE(TYPE, NAME) | |||
| @@ -19,6 +19,9 @@ | |||
| #define UNSORTEDSEGMENTSUM(type, type1) \ | |||
| int UnsortedSegmentSum_##type##_##type1(const type *input, int unit_num, int input_dim1, const type1 *indices, \ | |||
| type *output, int output_dim0, int output_dim1) { \ | |||
| NNACL_CHECK_NULL_RETURN_ERR(input); \ | |||
| NNACL_CHECK_NULL_RETURN_ERR(indices); \ | |||
| NNACL_CHECK_NULL_RETURN_ERR(output); \ | |||
| if (input_dim1 == 0) { \ | |||
| return NNACL_ERR; \ | |||
| } \ | |||
| @@ -17,6 +17,9 @@ | |||
| #include "nnacl/base/unstack_base.h" | |||
| void Unstack(const void *input, void **output, const UnstackParameter *para, int data_size) { | |||
| NNACL_CHECK_NULL_RETURN_VOID(input); | |||
| NNACL_CHECK_NULL_RETURN_VOID(output); | |||
| NNACL_CHECK_NULL_RETURN_VOID(para); | |||
| const int8_t *in_addr = (int8_t *)input; | |||
| for (int j = 0; j < para->num_; j++) { | |||
| int8_t *out_addr = (int8_t *)output[j]; | |||
| @@ -175,6 +175,11 @@ void Fp16TransposeDim6(const float16_t *in_data, float16_t *out_data, const int | |||
| void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape, | |||
| const TransposeParameter *param, int task_id, int thread_num) { | |||
| NNACL_CHECK_NULL_RETURN_VOID(in_data); | |||
| NNACL_CHECK_NULL_RETURN_VOID(out_data); | |||
| NNACL_CHECK_NULL_RETURN_VOID(output_shape); | |||
| NNACL_CHECK_NULL_RETURN_VOID(param); | |||
| NNACL_CHECK_ZERO_RETURN(thread_num); | |||
| const int *perm = param->perm_; | |||
| const int *strides = param->strides_; | |||
| const int *out_strides = param->out_strides_; | |||
| @@ -192,6 +197,7 @@ void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int | |||
| int output_idx = 0; | |||
| int input_idx = 0; | |||
| for (int i = 0; i < num_axes; ++i) { | |||
| NNACL_CHECK_ZERO_RETURN(*(out_strides + i)); | |||
| int position = pos / *(out_strides + i); | |||
| int out_stride = i < num_axes - 1 ? out_strides[i] : 1; | |||
| output_idx += (position * out_stride); | |||
| @@ -204,9 +210,10 @@ void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int | |||
| int DoTransposeFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape, | |||
| const TransposeParameter *param) { | |||
| if (in_data == NULL || out_data == NULL) { | |||
| return NNACL_ERR; | |||
| } | |||
| NNACL_CHECK_NULL_RETURN_ERR(in_data); | |||
| NNACL_CHECK_NULL_RETURN_ERR(out_data); | |||
| NNACL_CHECK_NULL_RETURN_ERR(output_shape); | |||
| NNACL_CHECK_NULL_RETURN_ERR(param); | |||
| const int *perm = param->perm_; | |||
| const int *strides = param->strides_; | |||
| const int *out_strides = param->out_strides_; | |||
| @@ -173,9 +173,11 @@ void TransposeDim6Fp32(const float *in_data, float *out_data, const int *strides | |||
| void TransposeDimsFp32(const float *in_data, float *out_data, const int *output_shape, | |||
| const TransposeParameter *transpose_param, int task_id, int thread_num) { | |||
| if (thread_num == 0) { | |||
| return; | |||
| } | |||
| NNACL_CHECK_NULL_RETURN_VOID(in_data); | |||
| NNACL_CHECK_NULL_RETURN_VOID(out_data); | |||
| NNACL_CHECK_NULL_RETURN_VOID(output_shape); | |||
| NNACL_CHECK_NULL_RETURN_VOID(transpose_param); | |||
| NNACL_CHECK_ZERO_RETURN(thread_num); | |||
| int *perm = (int *)(transpose_param->perm_); | |||
| int *strides = (int *)(transpose_param->strides_); | |||
| int *out_strides = (int *)(transpose_param->out_strides_); | |||
| @@ -206,9 +208,10 @@ void TransposeDimsFp32(const float *in_data, float *out_data, const int *output_ | |||
| int DoTransposeFp32(const float *in_data, float *out_data, const int *output_shape, | |||
| const TransposeParameter *transpose_param) { | |||
| if (in_data == NULL || out_data == NULL) { | |||
| return NNACL_ERR; | |||
| } | |||
| NNACL_CHECK_NULL_RETURN_ERR(in_data); | |||
| NNACL_CHECK_NULL_RETURN_ERR(out_data); | |||
| NNACL_CHECK_NULL_RETURN_ERR(output_shape); | |||
| NNACL_CHECK_NULL_RETURN_ERR(transpose_param); | |||
| int *perm = (int *)(transpose_param->perm_); | |||
| int *strides = (int *)(transpose_param->strides_); | |||
| int *out_strides = (int *)(transpose_param->out_strides_); | |||
| @@ -174,9 +174,10 @@ void TransposeDim6Int8(const int8_t *in_data, int8_t *out_data, const int *strid | |||
| int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape, | |||
| const TransposeParameter *transpose_param) { | |||
| if (in_data == NULL || out_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| NNACL_CHECK_NULL_RETURN_ERR(in_data); | |||
| NNACL_CHECK_NULL_RETURN_ERR(out_data); | |||
| NNACL_CHECK_NULL_RETURN_ERR(output_shape); | |||
| NNACL_CHECK_NULL_RETURN_ERR(transpose_param); | |||
| const int *perm = transpose_param->perm_; | |||
| const int *strides = transpose_param->strides_; | |||
| @@ -222,6 +223,11 @@ int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_s | |||
| void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape, | |||
| const TransposeParameter *transpose_param, int task_id, int thread_num) { | |||
| NNACL_CHECK_NULL_RETURN_VOID(in_data); | |||
| NNACL_CHECK_NULL_RETURN_VOID(out_data); | |||
| NNACL_CHECK_NULL_RETURN_VOID(output_shape); | |||
| NNACL_CHECK_NULL_RETURN_VOID(transpose_param); | |||
| NNACL_CHECK_ZERO_RETURN(thread_num); | |||
| const int *perm = transpose_param->perm_; | |||
| const int *strides = transpose_param->strides_; | |||
| const int *out_strides = transpose_param->out_strides_; | |||
| @@ -239,6 +245,7 @@ void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *outpu | |||
| int output_idx = 0; | |||
| int input_idx = 0; | |||
| for (int i = 0; i < num_axes; ++i) { | |||
| NNACL_CHECK_ZERO_RETURN(*(out_strides + i)); | |||
| int position = pos / *(out_strides + i); | |||
| int out_stride = i < num_axes - 1 ? out_strides[i] : 1; | |||
| output_idx += (position * out_stride); | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/print_cpu_kernel.h" | |||
| #include <algorithm> | |||
| #include "ir/tensor.h" | |||
| @@ -24,6 +25,7 @@ namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| void PrintCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_tensor_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| for (size_t i = 0; i < input_tensor_num; ++i) { | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); | |||
| @@ -51,7 +53,7 @@ bool PrintCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| } else { | |||
| ShapeVector shape; | |||
| (void)std::transform(input_shapes_[i].begin(), input_shapes_[i].end(), std::back_inserter(shape), | |||
| [](const size_t &value) { return static_cast<int64_t>(value); }); | |||
| [](const size_t &value) { return SizeToLong(value); }); | |||
| Tensor tensor(data_type, shape, inputs[i]->addr, input_sizes_[i] * sizeof(T)); | |||
| std::cout << tensor.ToStringNoLimit() << std::endl; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PRINT_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PRINT_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -23,7 +23,11 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace ps { | |||
| constexpr size_t kEmbeddingLookUpProxyInputsNum = 2; | |||
| constexpr size_t kEmbeddingLookUpProxyOutputsNum = 1; | |||
| void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| EmbeddingLookUpCPUKernel::InitKernel(kernel_node); | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| @@ -46,12 +50,12 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey); | |||
| } | |||
| std::vector<float> values; | |||
| std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(values), | |||
| [](size_t dim) -> float { return SizeToFloat(dim); }); | |||
| std::transform(indices_shape.begin(), indices_shape.end(), std::back_inserter(values), | |||
| [](size_t dim) -> float { return SizeToFloat(dim); }); | |||
| std::transform(output_shape.begin(), output_shape.end(), std::back_inserter(values), | |||
| [](size_t dim) -> float { return SizeToFloat(dim); }); | |||
| (void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(values), | |||
| [](size_t dim) -> float { return SizeToFloat(dim); }); | |||
| (void)std::transform(indices_shape.begin(), indices_shape.end(), std::back_inserter(values), | |||
| [](size_t dim) -> float { return SizeToFloat(dim); }); | |||
| (void)std::transform(output_shape.begin(), output_shape.end(), std::back_inserter(values), | |||
| [](size_t dim) -> float { return SizeToFloat(dim); }); | |||
| MS_LOG(INFO) << "Init embedding lookup proxy kernel, input shape:" << input_shape | |||
| << ", indices_shape:" << indices_shape << ", output_shape:" << output_shape; | |||
| std::vector<int64_t> lens{SizeToLong(input_shape.size()), SizeToLong(indices_shape.size()), | |||
| @@ -66,12 +70,8 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "Inputs size is " << inputs.size() << ", but EmbeddingLookUpProxyKernel needs 2."; | |||
| } | |||
| if (outputs.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "Outputs size is " << outputs.size() << ", but EmbeddingLookUpProxyKernel needs 1."; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEmbeddingLookUpProxyInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEmbeddingLookUpProxyOutputsNum, kernel_name_); | |||
| auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr); | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| size_t input_size = inputs[1]->size; | |||
| @@ -84,7 +84,6 @@ bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &i | |||
| auto ret = memcpy_s(lookup_ids.data(), lookup_ids.size() * sizeof(int), indices_addr, input_size); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "Lookup id memcpy failed."; | |||
| return false; | |||
| } | |||
| mindspore::ps::Worker::GetInstance().DoPSEmbeddingLookup(key_, lookup_ids, &lookup_result, | |||
| mindspore::ps::kEmbeddingLookupCmd); | |||
| @@ -92,7 +91,6 @@ bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &i | |||
| auto ret2 = memcpy_s(output_addr, outputs[0]->size, lookup_result.data(), output_size); | |||
| if (ret2 != EOK) { | |||
| MS_LOG(EXCEPTION) << "Lookup result memcpy failed."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_ | |||
| @@ -93,15 +93,14 @@ void EmbeddingLookUpPSKernel::UpdateEmbeddings(float *embedding_table, const siz | |||
| size_t dest_len = copy_len; | |||
| for (size_t i = 0; i < ids_size; ++i) { | |||
| int index = SizeToInt(lookup_ids[i]) - LongToInt(offset_); | |||
| if (index >= 0 && index < SizeToInt(first_dim_size_)) { | |||
| auto ret = memcpy_s(embedding_table + IntToSize(index) * outer_dim_size_, dest_len, | |||
| update_vals + i * outer_dim_size_, copy_len); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed."; | |||
| } | |||
| } else { | |||
| if (index < 0 || index >= SizeToInt(first_dim_size_)) { | |||
| MS_LOG(EXCEPTION) << "UpdateEmbeddings index invalid."; | |||
| } | |||
| auto ret = memcpy_s(embedding_table + IntToSize(index) * outer_dim_size_, dest_len, | |||
| update_vals + i * outer_dim_size_, copy_len); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed."; | |||
| } | |||
| } | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -44,6 +44,7 @@ class PullKernel : public CPUKernel { | |||
| return true; | |||
| } | |||
| void Init(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(ERROR) << "Input number is " << input_num << ", but pull needs 2 inputs."; | |||
| @@ -49,7 +49,6 @@ class PushKernel : public CPUKernel { | |||
| auto ret = memcpy_s(outputs[0]->addr, outputs[0]->size, &key_, sizeof(size_t)); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "Lookup id memcpy failed."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -27,6 +27,7 @@ constexpr size_t kSparseApplyAdamPSInputsShapeSize = 11; | |||
| void SparseApplyAdamPSKernel::InitKernel( | |||
| const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(shapes); | |||
| const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes; | |||
| if (shape_vec.size() < kSparseApplyAdamPSInputsShapeSize) { | |||
| @@ -68,7 +69,7 @@ void SparseApplyAdamPSKernel::InitKernel( | |||
| MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices"; | |||
| } | |||
| if (AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) { | |||
| use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, "use_nesterov"); | |||
| use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, USE_NESTEROV); | |||
| } | |||
| (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_); | |||
| (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_); | |||
| @@ -79,7 +80,7 @@ void SparseApplyAdamPSKernel::InitKernel( | |||
| void SparseApplyAdamPSKernel::ReInit(const std::vector<std::vector<size_t>> &shapes) { | |||
| if (shapes.empty() || shapes[0].empty()) { | |||
| MS_LOG(EXCEPTION) << "Shape should not empty"; | |||
| MS_LOG(EXCEPTION) << "Shape is empty"; | |||
| } | |||
| const std::vector<size_t> &indices_shape = shapes[0]; | |||
| indices_size_ = indices_shape[0]; | |||
| @@ -24,6 +24,7 @@ constexpr size_t kSparseApplyFtrlPSInputSize = 5; | |||
| void SparseApplyFtrlPSKernel::InitKernel( | |||
| const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(shapes); | |||
| const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes; | |||
| if (shape_vec.size() < kSparseApplyFtrlPSInputSize) { | |||
| @@ -46,7 +46,7 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK | |||
| protected: | |||
| void ReInit(const std::vector<AddressPtr> &) override; | |||
| float init_accum_; | |||
| float init_accum_{0.1}; | |||
| }; | |||
| } // namespace ps | |||
| } // namespace kernel | |||
| @@ -23,14 +23,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace ps { | |||
| constexpr size_t kSparseApplyLazyAdamPSInputSize = 5; | |||
| constexpr size_t kSparseApplyLazyAdamPSInputsSize = 11; | |||
| void SparseApplyLazyAdamPSKernel::InitKernel( | |||
| const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(shapes); | |||
| const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes; | |||
| if (shape_vec.size() < kSparseApplyLazyAdamPSInputSize) { | |||
| MS_LOG(EXCEPTION) << "SparseApplyLazyAdamPSKernel needs " << kSparseApplyLazyAdamPSInputSize | |||
| if (shape_vec.size() < kSparseApplyLazyAdamPSInputsSize) { | |||
| MS_LOG(EXCEPTION) << "SparseApplyLazyAdamPSKernel needs " << kSparseApplyLazyAdamPSInputsSize | |||
| << " input shapes, but got " << shape_vec.size(); | |||
| } | |||
| std::vector<size_t> &var_shape = *(shape_vec[0]); | |||
| @@ -70,7 +71,7 @@ void SparseApplyLazyAdamPSKernel::InitKernel( | |||
| MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices"; | |||
| } | |||
| if (AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) { | |||
| use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, "use_nesterov"); | |||
| use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, USE_NESTEROV); | |||
| } | |||
| (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_); | |||
| (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_); | |||
| @@ -89,6 +90,10 @@ void SparseApplyLazyAdamPSKernel::ReInit(const std::vector<std::vector<size_t>> | |||
| } | |||
| void SparseApplyLazyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) { | |||
| if (inputs.size() < kSparseApplyLazyAdamPSInputsSize) { | |||
| MS_LOG(EXCEPTION) << "Input shape size should not less than " << kSparseApplyLazyAdamPSInputsSize << ", but got " | |||
| << inputs.size(); | |||
| } | |||
| const auto &indices_addr = inputs[10]; | |||
| indices_size_ = indices_addr->size / sizeof(int); | |||
| workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_; | |||
| @@ -20,6 +20,13 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kUniformIntInputsNum = 3; | |||
| constexpr size_t kUniformRealInputsNum = 1; | |||
| constexpr size_t kUniformIntOutputsNum = 1; | |||
| constexpr size_t kUniformRealOutputsNum = 1; | |||
| constexpr size_t kStandardNormalOutputsNum = 1; | |||
| } // namespace | |||
| void StandardNormal(float *output, std::normal_distribution<float> distribution, | |||
| std::default_random_engine random_generator, size_t start, size_t end) { | |||
| for (size_t i = start; i < end; i++) { | |||
| @@ -60,12 +67,6 @@ void LaunchStandardNormal(unsigned int seed, const std::vector<AddressPtr> &outp | |||
| void LaunchUniformInt(unsigned int seed, const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| if (inputs.size() != 3) { | |||
| MS_LOG(EXCEPTION) << "Expect input number 3, actual got input number " << inputs.size(); | |||
| } | |||
| if (outputs.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "Expect output number 1, actual got output number " << outputs.size(); | |||
| } | |||
| // Init min/max values. | |||
| int min_val = reinterpret_cast<int *>(inputs[1]->addr)[0]; | |||
| int max_val = reinterpret_cast<int *>(inputs[2]->addr)[0]; | |||
| @@ -75,7 +76,6 @@ void LaunchUniformInt(unsigned int seed, const std::vector<AddressPtr> &inputs, | |||
| // Init output address. | |||
| auto output = reinterpret_cast<int *>(outputs[0]->addr); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| // Init sample number. | |||
| size_t num_sample = outputs[0]->size / sizeof(int); | |||
| @@ -92,15 +92,8 @@ void LaunchUniformInt(unsigned int seed, const std::vector<AddressPtr> &inputs, | |||
| void LaunchUniformReal(unsigned int seed, const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| if (inputs.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "Expect input number 1, actual got input number " << inputs.size(); | |||
| } | |||
| if (outputs.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "Expect output number 1, actual got output number " << outputs.size(); | |||
| } | |||
| // Init output address. | |||
| auto output = reinterpret_cast<float *>(outputs[0]->addr); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| // Init sample number. | |||
| size_t num_sample = outputs[0]->size / sizeof(int); | |||
| @@ -117,24 +110,14 @@ void LaunchUniformReal(unsigned int seed, const std::vector<AddressPtr> &inputs, | |||
| void RandomCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto iter = kRandomOpTypeMap.find(kernel_name); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto iter = kRandomOpTypeMap.find(kernel_name_); | |||
| if (iter == kRandomOpTypeMap.end()) { | |||
| MS_LOG(EXCEPTION) << "Random operation " << kernel_name << " is not supported."; | |||
| MS_LOG(EXCEPTION) << "Random operation " << kernel_name_ << " is not supported."; | |||
| } else { | |||
| random_op_type_ = iter->second; | |||
| } | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if ((random_op_type_ == RANDOM_OP_NORMAL) && input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but random op needs 1 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but random op needs 1 output."; | |||
| } | |||
| seed_ = LongToInt(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"))); | |||
| seed2_ = LongToInt(GetValue<int64_t>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"))); | |||
| } | |||
| @@ -152,10 +135,15 @@ bool RandomCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, cons | |||
| } | |||
| if (random_op_type_ == RANDOM_OP_NORMAL) { | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kStandardNormalOutputsNum, kernel_name_); | |||
| LaunchStandardNormal(RNG_seed, outputs); | |||
| } else if (random_op_type_ == RANDOM_OP_UNIFORM_INT) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kUniformIntInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kUniformIntOutputsNum, kernel_name_); | |||
| LaunchUniformInt(RNG_seed, inputs, outputs); | |||
| } else if (random_op_type_ == RANDOM_OP_UNIFORM_REAL) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kUniformRealInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kUniformRealOutputsNum, kernel_name_); | |||
| LaunchUniformReal(RNG_seed, inputs, outputs); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Random operation " << random_op_type_ << " is not supported."; | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/reduce_scatter_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "runtime/device/cpu/mpi/mpi_interface.h" | |||
| @@ -22,12 +23,15 @@ namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr auto kRanksGroup = "group"; | |||
| constexpr size_t kReduceScatterInputsNum = 1; | |||
| constexpr size_t kReduceScatterOutputsNum = 1; | |||
| } // namespace | |||
| ReduceScatterCPUKernel::ReduceScatterCPUKernel() : op_type_(kMPIOpTypeSum) {} | |||
| void ReduceScatterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| auto op = primitive->GetAttr("op"); | |||
| @@ -46,8 +50,10 @@ void ReduceScatterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool ReduceScatterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kReduceScatterInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kReduceScatterOutputsNum, kernel_name_); | |||
| auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto *output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto output_data_num = outputs[0]->size / sizeof(float); | |||
| return MPIReduceScatter(input_addr, output_addr, ranks_group_, output_data_num, op_type_); | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -19,37 +19,41 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kReshapeInputsNum = 1; | |||
| constexpr size_t kReshapeOutputsNum = 1; | |||
| } // namespace | |||
| void ReshapeCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| node_wpt_ = kernel_node; | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| x_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| type_size_ = GetTypeByte(TypeIdToType(x_data_type_)); | |||
| } | |||
| bool ReshapeCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto node_ = node_wpt_.lock(); | |||
| if (!node_) { | |||
| MS_LOG(EXCEPTION) << "node_wpt_ is expired."; | |||
| } | |||
| auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Input or output empty!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kReshapeInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kReshapeOutputsNum, kernel_name_); | |||
| if (inputs[0]->size != outputs[0]->size) { | |||
| return false; | |||
| } | |||
| if (inputs[0]->addr == outputs[0]->addr) { | |||
| return true; | |||
| } | |||
| auto node = node_wpt_.lock(); | |||
| if (!node) { | |||
| MS_LOG(EXCEPTION) << "node_wpt_ is expired."; | |||
| } | |||
| auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0); | |||
| size_t mem_bits = type_size_; | |||
| for (size_t i = 0; i < x_shape.size(); ++i) { | |||
| mem_bits *= x_shape[i]; | |||
| } | |||
| auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits); | |||
| if (ret != 0) { | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret; | |||
| } | |||
| return true; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2021-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -21,24 +21,26 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kResizeBilinearInputSize = 4; | |||
| constexpr size_t kResizeBilinearInputsNum = 1; | |||
| constexpr size_t kResizeBilinearOutputsNum = 1; | |||
| constexpr size_t kResizeBilinearInputsShapeSize = 4; | |||
| constexpr size_t kResizeBilinearAttrSize = 2; | |||
| } // namespace | |||
| void ResizeBilinearCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| size_ = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, SIZE); | |||
| align_corners_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "align_corners"); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (shape_.size() < kResizeBilinearInputSize) { | |||
| MS_LOG(EXCEPTION) << "Input shape size should be " << kResizeBilinearInputSize << ", but got " << shape_.size(); | |||
| if (shape_.size() != kResizeBilinearInputsShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Input shape size should be " << kResizeBilinearInputsShapeSize << ", but got " | |||
| << shape_.size(); | |||
| } | |||
| if (size_.size() < kResizeBilinearAttrSize) { | |||
| MS_LOG(EXCEPTION) << "Attr SIZE shape size should be " << kResizeBilinearAttrSize << ", but got " << size_.size(); | |||
| if (size_.size() != kResizeBilinearAttrSize) { | |||
| MS_LOG(EXCEPTION) << "Size attr requires " << kResizeBilinearAttrSize << " elements, but got " << size_.size(); | |||
| } | |||
| size_t in_height = shape_[2]; | |||
| size_t in_width = shape_[3]; | |||
| size_t out_height = size_[0]; | |||
| @@ -50,6 +52,8 @@ void ResizeBilinearCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool ResizeBilinearCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kResizeBilinearInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kResizeBilinearOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16, float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -62,10 +66,9 @@ bool ResizeBilinearCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inpu | |||
| template <typename T1, typename T2> | |||
| void ResizeBilinearCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto input_addr = reinterpret_cast<T1 *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<T2 *>(outputs[0]->addr); | |||
| const std::vector<AddressPtr> &outputs) const { | |||
| const auto *input_addr = reinterpret_cast<T1 *>(inputs[0]->addr); | |||
| auto *output_addr = reinterpret_cast<T2 *>(outputs[0]->addr); | |||
| size_t batch_size = shape_[0]; | |||
| size_t channel = shape_[1]; | |||
| size_t in_height = shape_[2]; | |||
| @@ -84,7 +87,6 @@ void ResizeBilinearCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs | |||
| std::vector<CachedInterpolation> ys(out_height + 1); | |||
| std::vector<CachedInterpolation> xs(out_width + 1); | |||
| ComputeInterpolationWeights(out_height, in_height, height_scale, ys.data()); | |||
| ComputeInterpolationWeights(out_width, in_width, width_scale, xs.data()); | |||
| @@ -111,16 +113,5 @@ void ResizeBilinearCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs | |||
| } | |||
| } | |||
| } | |||
| void ResizeBilinearCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "ResizeBilinear needs 1 inputs, but gets " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "ResizeBilinear expects 1 output, but gets" << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -36,11 +36,10 @@ class ResizeBilinearCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T1, typename T2> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| bool align_corners_{false}; | |||
| float height_scale{1.0}; | |||
| @@ -21,23 +21,25 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kResizeBilinearGradInput0Size = 4; | |||
| constexpr size_t kResizeBilinearGradInput1Size = 4; | |||
| constexpr size_t kResizeBilinearGradInputsNum = 2; | |||
| constexpr size_t kResizeBilinearGradOutputNum = 1; | |||
| constexpr size_t kResizeBilinearGradInputsDoutShapeSize = 4; | |||
| constexpr size_t kResizeBilinearGradInputsXShapeSize = 4; | |||
| } // namespace | |||
| void ResizeBilinearGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| size_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| align_corners_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "align_corners"); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (shape_.size() < kResizeBilinearGradInput0Size) { | |||
| MS_LOG(EXCEPTION) << "Input_0 shape size should be " << kResizeBilinearGradInput0Size << ", but got " | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| if (shape_.size() < kResizeBilinearGradInputsDoutShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Input dout shape should be " << kResizeBilinearGradInputsDoutShapeSize << ", but got " | |||
| << shape_.size(); | |||
| } | |||
| if (size_.size() < kResizeBilinearGradInput1Size) { | |||
| MS_LOG(EXCEPTION) << "Input_1 shape size should be " << kResizeBilinearGradInput1Size << ", but got " | |||
| if (size_.size() < kResizeBilinearGradInputsXShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Input x shape should be " << kResizeBilinearGradInputsXShapeSize << ", but got " | |||
| << size_.size(); | |||
| } | |||
| @@ -45,7 +47,6 @@ void ResizeBilinearGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| size_t in_width = shape_[3]; | |||
| size_t out_height = size_[2]; | |||
| size_t out_width = size_[3]; | |||
| height_scale = Scaling(out_height, in_height, align_corners_); | |||
| width_scale = Scaling(out_width, in_width, align_corners_); | |||
| } | |||
| @@ -53,6 +54,8 @@ void ResizeBilinearGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool ResizeBilinearGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kResizeBilinearGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kResizeBilinearGradOutputNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -65,9 +68,9 @@ bool ResizeBilinearGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> & | |||
| template <typename T> | |||
| void ResizeBilinearGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto dloss_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| const std::vector<AddressPtr> &outputs) const { | |||
| const auto *dloss_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size); | |||
| if (ret != EOK) { | |||
| @@ -111,16 +114,5 @@ void ResizeBilinearGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &in | |||
| } | |||
| } | |||
| } | |||
| void ResizeBilinearGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "ResizeBilinearGrad needs 2 inputs, but gets " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "ResizeBilinear Gradexpects 1 output, but gets" << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -36,15 +36,14 @@ class ResizeBilinearGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| bool align_corners_ = false; | |||
| float height_scale = 1.; | |||
| float width_scale = 1.; | |||
| bool align_corners_{false}; | |||
| float height_scale{1.0}; | |||
| float width_scale{1.0}; | |||
| std::vector<size_t> size_; | |||
| std::vector<size_t> shape_; | |||
| }; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -21,24 +21,26 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kResizeNearestNeighborInputSize = 4; | |||
| constexpr size_t kResizeNearestNeighborOutputSize = 2; | |||
| constexpr size_t kResizeNearestNeighborInputsNum = 1; | |||
| constexpr size_t kResizeNearestNeighborOutputNum = 1; | |||
| constexpr size_t kResizeNearestNeighborInputsShapeSize = 4; | |||
| constexpr size_t kResizeNearestNeighborAttrSize = 2; | |||
| } // namespace | |||
| void ResizeNearestNeighborCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| std::vector<int64_t> output_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, SIZE); | |||
| align_corners_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "align_corners"); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (input_shape.size() < kResizeNearestNeighborInputSize) { | |||
| MS_LOG(EXCEPTION) << "Input_0 shape size should be " << kResizeNearestNeighborInputSize << ", but got " | |||
| if (input_shape.size() != kResizeNearestNeighborInputsShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Input shape size should be " << kResizeNearestNeighborInputsShapeSize << ", but got " | |||
| << input_shape.size(); | |||
| } | |||
| if (output_size.size() < kResizeNearestNeighborOutputSize) { | |||
| MS_LOG(EXCEPTION) << "Output shape size should be " << kResizeNearestNeighborOutputSize << ", but got " | |||
| << output_size.size(); | |||
| if (output_size.size() != kResizeNearestNeighborAttrSize) { | |||
| MS_LOG(EXCEPTION) << "Size attr should be " << kResizeNearestNeighborAttrSize << ", but got " << output_size.size(); | |||
| } | |||
| batch_size_ = input_shape[0]; | |||
| @@ -55,6 +57,8 @@ void ResizeNearestNeighborCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool ResizeNearestNeighborCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kResizeNearestNeighborInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kResizeNearestNeighborOutputNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -74,8 +78,8 @@ bool ResizeNearestNeighborCPUKernel::Launch(const std::vector<kernel::AddressPtr | |||
| template <typename T> | |||
| void ResizeNearestNeighborCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| if (out_height_ == in_height_ && out_width_ == in_width_) { | |||
| for (size_t i = 0; i < output_size_; ++i) { | |||
| @@ -99,16 +103,5 @@ void ResizeNearestNeighborCPUKernel::LaunchKernel(const std::vector<AddressPtr> | |||
| output_addr[i] = input_addr[input_pos]; | |||
| } | |||
| } | |||
| void ResizeNearestNeighborCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "ResizeBilinear needs 1 inputs, but gets " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "ResizeBilinear expects 1 output, but gets" << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -36,11 +36,10 @@ class ResizeNearestNeighborCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| bool align_corners_{false}; | |||
| size_t batch_size_{0}; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -21,23 +21,27 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kResizeNearestNeighborGradInputSize = 4; | |||
| constexpr size_t kResizeNearestNeighborGradOutputSize = 4; | |||
| constexpr size_t kResizeNearestNeighborGradInputsNum = 1; | |||
| constexpr size_t kResizeNearestNeighborGradOutputNum = 1; | |||
| constexpr size_t kResizeNearestNeighborGradInputsShapeSize = 4; | |||
| constexpr size_t kResizeNearestNeighborGradOutputsShapeSize = 4; | |||
| } // namespace | |||
| void ResizeNearestNeighborGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| std::vector<size_t> output_size = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| align_corners_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "align_corners"); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (input_shape.size() < kResizeNearestNeighborGradInputSize) { | |||
| MS_LOG(EXCEPTION) << "Input_0 shape size should be " << kResizeNearestNeighborGradInputSize << ", but got " | |||
| dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); | |||
| if (input_shape.size() != kResizeNearestNeighborGradInputsShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Input shape size should be " << kResizeNearestNeighborGradInputsShapeSize << ", but got " | |||
| << input_shape.size(); | |||
| } | |||
| if (output_size.size() < kResizeNearestNeighborGradOutputSize) { | |||
| MS_LOG(EXCEPTION) << "Output shape size should be " << kResizeNearestNeighborGradOutputSize << ", but got " | |||
| if (output_size.size() != kResizeNearestNeighborGradOutputsShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Output shape size should be " << kResizeNearestNeighborGradOutputsShapeSize << ", but got " | |||
| << output_size.size(); | |||
| } | |||
| @@ -54,6 +58,8 @@ void ResizeNearestNeighborGradCPUKernel::InitKernel(const CNodePtr &kernel_node) | |||
| bool ResizeNearestNeighborGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kResizeNearestNeighborGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kResizeNearestNeighborGradOutputNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -73,9 +79,8 @@ bool ResizeNearestNeighborGradCPUKernel::Launch(const std::vector<kernel::Addres | |||
| template <typename T> | |||
| void ResizeNearestNeighborGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto dloss_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| const auto *dloss_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "Output buffer memset failed, ret:" << ret; | |||
| @@ -83,7 +88,6 @@ void ResizeNearestNeighborGradCPUKernel::LaunchKernel(const std::vector<AddressP | |||
| size_t in_hw_size = in_width_ * in_height_; | |||
| size_t out_hw_size = out_width_ * out_height_; | |||
| for (size_t b = 0; b < batch_size_; ++b) { | |||
| for (size_t c = 0; c < channel_; ++c) { | |||
| for (size_t h = 0; h < in_height_; ++h) { | |||
| @@ -102,16 +106,5 @@ void ResizeNearestNeighborGradCPUKernel::LaunchKernel(const std::vector<AddressP | |||
| } | |||
| } | |||
| } | |||
| void ResizeNearestNeighborGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "ResizeBilinearGrad needs 1 inputs, but gets " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "ResizeBilinear Gradexpects 1 output, but gets" << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -36,11 +36,10 @@ class ResizeNearestNeighborGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| bool align_corners_{false}; | |||
| size_t batch_size_{0}; | |||
| @@ -21,6 +21,11 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kCenteredRMSPropInputsNum = 9; | |||
| constexpr size_t kRMSPropInputsNum = 5; | |||
| } // namespace | |||
| template <typename T> | |||
| void RMSPropCPUKernel<T>::LaunchRMSPropUnuseCenter(T *variable, T *mean_square, T *moment, T *gradients, | |||
| float *learning_rate) { | |||
| @@ -71,6 +76,7 @@ void RMSPropCPUKernel<T>::LaunchRMSPropUseCenter(T *variable, T *mean_square, T | |||
| template <typename T> | |||
| void RMSPropCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto node_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (node_name == "ApplyCenteredRMSProp") { | |||
| use_center_ = true; | |||
| @@ -92,6 +98,7 @@ template <typename T> | |||
| bool RMSPropCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &) { | |||
| if (!use_center_) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kCenteredRMSPropInputsNum, kernel_name_); | |||
| float *variable = reinterpret_cast<float *>(inputs[0]->addr); | |||
| float *mean_square = reinterpret_cast<float *>(inputs[1]->addr); | |||
| float *moment = reinterpret_cast<float *>(inputs[2]->addr); | |||
| @@ -102,6 +109,7 @@ bool RMSPropCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| MS_LOG(INFO) << "RMSPropCPUKernel lens:" << lens << " size_:" << size_; | |||
| LaunchRMSPropUnuseCenter(variable, mean_square, moment, gradients, learning_rate); | |||
| } else { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kRMSPropInputsNum, kernel_name_); | |||
| T *variable = reinterpret_cast<float *>(inputs[0]->addr); | |||
| T *mean_gradients = reinterpret_cast<float *>(inputs[1]->addr); | |||
| T *mean_square = reinterpret_cast<float *>(inputs[2]->addr); | |||
| @@ -27,7 +27,7 @@ namespace kernel { | |||
| template <typename T> | |||
| class RMSPropCPUKernel : public CPUKernel { | |||
| public: | |||
| RMSPropCPUKernel() {} | |||
| RMSPropCPUKernel() = default; | |||
| ~RMSPropCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| @@ -22,15 +22,34 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kInputNum = 3; | |||
| constexpr size_t kOutputNum = 1; | |||
| constexpr size_t kScatterArithmeticInputsNum = 3; | |||
| constexpr size_t kScatterArithmeticOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::InitComputeFunc() { | |||
| static const std::map<std::string, TypeComputeFunc> scatterArithmeticFuncMap{ | |||
| {prim::kPrimScatterAdd->name(), &ScatterArithmeticCPUKernel<T>::ScatterAdd}, | |||
| {prim::kPrimScatterSub->name(), &ScatterArithmeticCPUKernel<T>::ScatterSub}, | |||
| {prim::kPrimScatterMul->name(), &ScatterArithmeticCPUKernel<T>::ScatterMul}, | |||
| {prim::kPrimScatterDiv->name(), &ScatterArithmeticCPUKernel<T>::ScatterDiv}, | |||
| {prim::kPrimScatterMax->name(), &ScatterArithmeticCPUKernel<T>::ScatterMax}, | |||
| {prim::kPrimScatterMin->name(), &ScatterArithmeticCPUKernel<T>::ScatterMin}, | |||
| {prim::kPrimScatterUpdate->name(), &ScatterArithmeticCPUKernel<T>::ScatterUpdate}}; | |||
| if (scatterArithmeticFuncMap.find(kernel_name_) == scatterArithmeticFuncMap.end()) { | |||
| MS_LOG(EXCEPTION) << "ScatterArithmeticCPUKernel does not support " << kernel_name_; | |||
| } | |||
| compute_func_ = scatterArithmeticFuncMap.at(kernel_name_); | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "Input shape size should not less than 1"; | |||
| } | |||
| input_size_ = 1; | |||
| inner_size_ = 1; | |||
| if (input_shape.empty()) { | |||
| @@ -46,52 +65,30 @@ void ScatterArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| for (size_t i = 0; i < indices_shape.size(); i++) { | |||
| indices_size_ *= indices_shape[i]; | |||
| } | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) const { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != kInputNum) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterAdd needs 3 inputs."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != kOutputNum) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterAdd has 1 output."; | |||
| } | |||
| InitComputeFunc(); | |||
| } | |||
| template <typename T> | |||
| bool ScatterArithmeticCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| static const std::map<std::string, std::function<void(ScatterArithmeticCPUKernel *, T *, const int *, const T *)>> | |||
| kScatterArithmeticBinOpFuncMap{{"ScatterAdd", &ScatterArithmeticCPUKernel<T>::ScatterAdd}, | |||
| {"ScatterSub", &ScatterArithmeticCPUKernel<T>::ScatterSub}, | |||
| {"ScatterMul", &ScatterArithmeticCPUKernel<T>::ScatterMul}, | |||
| {"ScatterDiv", &ScatterArithmeticCPUKernel<T>::ScatterDiv}, | |||
| {"ScatterMax", &ScatterArithmeticCPUKernel<T>::ScatterMax}, | |||
| {"ScatterMin", &ScatterArithmeticCPUKernel<T>::ScatterMin}, | |||
| {"ScatterUpdate", &ScatterArithmeticCPUKernel<T>::ScatterUpdate}}; | |||
| if (kScatterArithmeticBinOpFuncMap.find(kernel_name_) != kScatterArithmeticBinOpFuncMap.end()) { | |||
| T *input = reinterpret_cast<T *>(inputs[INPUT]->addr); | |||
| int *indices = reinterpret_cast<int *>(inputs[INDICES]->addr); | |||
| T *updates = reinterpret_cast<T *>(inputs[UPDATES]->addr); | |||
| T *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| kScatterArithmeticBinOpFuncMap.at(kernel_name_)(this, input, indices, updates); | |||
| auto bufferSize = outputs[0]->size; | |||
| auto ret = memcpy_s(output, bufferSize, input, input_size_ * sizeof(T)); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "Memory copy failed!"; | |||
| } | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Not support operator:" << kernel_name_; | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kScatterArithmeticInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kScatterArithmeticOutputsNum, kernel_name_); | |||
| auto *input = reinterpret_cast<T *>(inputs[INPUT_INDEX_]->addr); | |||
| auto *indices = reinterpret_cast<int *>(inputs[INDICES_INDEX_]->addr); | |||
| auto *updates = reinterpret_cast<T *>(inputs[UPDATES_INDEX_]->addr); | |||
| auto *output = reinterpret_cast<T *>(outputs[OUTPUT_INDEX_]->addr); | |||
| compute_func_(this, input, indices, updates); | |||
| auto bufferSize = outputs[OUTPUT_INDEX_]->size; | |||
| auto ret = memcpy_s(output, bufferSize, input, input_size_ * sizeof(T)); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "Memory copy failed!"; | |||
| } | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::ScatterAdd(T *input, const int *indices, const T *updates) { | |||
| void ScatterArithmeticCPUKernel<T>::ScatterAdd(T *input, const int *indices, const T *updates) const { | |||
| for (size_t i = 0; i < indices_size_; i++) { | |||
| auto base_index_updates = i * inner_size_; | |||
| auto base_index_input = indices[i] * inner_size_; | |||
| @@ -102,7 +99,7 @@ void ScatterArithmeticCPUKernel<T>::ScatterAdd(T *input, const int *indices, con | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::ScatterSub(T *input, const int *indices, const T *updates) { | |||
| void ScatterArithmeticCPUKernel<T>::ScatterSub(T *input, const int *indices, const T *updates) const { | |||
| for (size_t i = 0; i < indices_size_; i++) { | |||
| auto base_index_updates = i * inner_size_; | |||
| auto base_index_input = indices[i] * inner_size_; | |||
| @@ -113,7 +110,7 @@ void ScatterArithmeticCPUKernel<T>::ScatterSub(T *input, const int *indices, con | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::ScatterMul(T *input, const int *indices, const T *updates) { | |||
| void ScatterArithmeticCPUKernel<T>::ScatterMul(T *input, const int *indices, const T *updates) const { | |||
| for (size_t i = 0; i < indices_size_; i++) { | |||
| auto base_index_updates = i * inner_size_; | |||
| auto base_index_input = indices[i] * inner_size_; | |||
| @@ -124,32 +121,32 @@ void ScatterArithmeticCPUKernel<T>::ScatterMul(T *input, const int *indices, con | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::ScatterDiv(T *input, const int *indices, const T *updates) { | |||
| void ScatterArithmeticCPUKernel<T>::ScatterDiv(T *input, const int *indices, const T *updates) const { | |||
| for (size_t i = 0; i < indices_size_; i++) { | |||
| for (size_t j = 0; j < inner_size_; j++) { | |||
| auto dividend = input[indices[i] * inner_size_ + j]; | |||
| auto divisor = updates[i * inner_size_ + j]; | |||
| if (divisor == 0) { | |||
| if (dividend == 0) { | |||
| input[indices[i] * inner_size_ + j] = std::numeric_limits<T>::quiet_NaN(); | |||
| continue; | |||
| } | |||
| if (std::numeric_limits<T>::has_infinity) { | |||
| input[indices[i] * inner_size_ + j] = | |||
| dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity(); | |||
| } else { | |||
| input[indices[i] * inner_size_ + j] = | |||
| dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); | |||
| } | |||
| if (divisor != 0) { | |||
| input[indices[i] * inner_size_ + j] = dividend / divisor; | |||
| continue; | |||
| } | |||
| if (dividend == 0) { | |||
| input[indices[i] * inner_size_ + j] = std::numeric_limits<T>::quiet_NaN(); | |||
| continue; | |||
| } | |||
| input[indices[i] * inner_size_ + j] = dividend / divisor; | |||
| if (std::numeric_limits<T>::has_infinity) { | |||
| input[indices[i] * inner_size_ + j] = | |||
| dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity(); | |||
| } else { | |||
| input[indices[i] * inner_size_ + j] = | |||
| dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min(); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::ScatterMax(T *input, const int *indices, const T *updates) { | |||
| void ScatterArithmeticCPUKernel<T>::ScatterMax(T *input, const int *indices, const T *updates) const { | |||
| for (size_t i = 0; i < indices_size_; i++) { | |||
| auto base_index_updates = i * inner_size_; | |||
| auto base_index_input = indices[i] * inner_size_; | |||
| @@ -162,7 +159,7 @@ void ScatterArithmeticCPUKernel<T>::ScatterMax(T *input, const int *indices, con | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::ScatterMin(T *input, const int *indices, const T *updates) { | |||
| void ScatterArithmeticCPUKernel<T>::ScatterMin(T *input, const int *indices, const T *updates) const { | |||
| for (size_t i = 0; i < indices_size_; i++) { | |||
| auto base_index_updates = i * inner_size_; | |||
| auto base_index_input = indices[i] * inner_size_; | |||
| @@ -175,7 +172,7 @@ void ScatterArithmeticCPUKernel<T>::ScatterMin(T *input, const int *indices, con | |||
| } | |||
| template <typename T> | |||
| void ScatterArithmeticCPUKernel<T>::ScatterUpdate(T *input, const int *indices, const T *updates) { | |||
| void ScatterArithmeticCPUKernel<T>::ScatterUpdate(T *input, const int *indices, const T *updates) const { | |||
| for (size_t i = 0; i < indices_size_; i++) { | |||
| auto base_index_updates = i * inner_size_; | |||
| auto base_index_input = indices[i] * inner_size_; | |||
| @@ -37,27 +37,25 @@ class ScatterArithmeticCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node) const; | |||
| void ScatterAdd(T *input, const int *indices, const T *updates); | |||
| void ScatterSub(T *input, const int *indices, const T *updates); | |||
| void ScatterMul(T *input, const int *indices, const T *updates); | |||
| void ScatterDiv(T *input, const int *indices, const T *updates); | |||
| void ScatterMax(T *input, const int *indices, const T *updates); | |||
| void ScatterMin(T *input, const int *indices, const T *updates); | |||
| void ScatterUpdate(T *input, const int *indices, const T *updates); | |||
| size_t input_size_{1}; | |||
| size_t inner_size_{1}; | |||
| size_t indices_size_{1}; | |||
| std::string kernel_name_; | |||
| enum input_list_ { INPUT, INDICES, UPDATES }; | |||
| void InitComputeFunc(); | |||
| void ScatterAdd(T *input, const int *indices, const T *updates) const; | |||
| void ScatterSub(T *input, const int *indices, const T *updates) const; | |||
| void ScatterMul(T *input, const int *indices, const T *updates) const; | |||
| void ScatterDiv(T *input, const int *indices, const T *updates) const; | |||
| void ScatterMax(T *input, const int *indices, const T *updates) const; | |||
| void ScatterMin(T *input, const int *indices, const T *updates) const; | |||
| void ScatterUpdate(T *input, const int *indices, const T *updates) const; | |||
| using TypeComputeFunc = std::function<void(ScatterArithmeticCPUKernel *, T *, const int *, const T *)>; | |||
| TypeComputeFunc compute_func_; | |||
| size_t input_size_{0}; | |||
| size_t inner_size_{0}; | |||
| size_t indices_size_{0}; | |||
| const size_t INPUT_INDEX_{0}; | |||
| const size_t INDICES_INDEX_{1}; | |||
| const size_t UPDATES_INDEX_{2}; | |||
| const size_t OUTPUT_INDEX_{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T(ScatterAdd, | |||
| @@ -22,14 +22,21 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kScatterNdUpdateInputsNum = 3; | |||
| constexpr size_t kScatterNdUpdateOutputsNum = 1; | |||
| constexpr size_t kMinIndiceRank = 2; | |||
| template <typename T> | |||
| void Compute(const ComputeParams<T> *params, const size_t start, const size_t end) { | |||
| MS_EXCEPTION_IF_NULL(params); | |||
| T *x = params->x_; | |||
| int *indices = params->indices_; | |||
| T *updates = params->updates_; | |||
| std::vector<int> *out_strides = params->out_strides_; | |||
| MS_EXCEPTION_IF_NULL(x); | |||
| MS_EXCEPTION_IF_NULL(indices); | |||
| MS_EXCEPTION_IF_NULL(updates); | |||
| MS_EXCEPTION_IF_NULL(out_strides); | |||
| for (int i = SizeToInt(start); i < SizeToInt(end); ++i) { | |||
| int offset = 0; | |||
| @@ -51,7 +58,7 @@ void Compute(const ComputeParams<T> *params, const size_t start, const size_t en | |||
| void ScatterNdUpdateCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| Check(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto updates_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| @@ -93,6 +100,8 @@ void ScatterNdUpdateCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool ScatterNdUpdateCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kScatterNdUpdateInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kScatterNdUpdateOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -136,16 +145,5 @@ void ScatterNdUpdateCPUKernel::LaunchKernel(const std::vector<AddressPtr> &input | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret; | |||
| } | |||
| } | |||
| void ScatterNdUpdateCPUKernel::Check(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterNdUpdate needs 3 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterNdUpdate needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -46,11 +46,10 @@ class ScatterNdUpdateCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs); | |||
| private: | |||
| void Check(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| int unit_size_{0}; | |||
| size_t num_units_{0}; | |||
| @@ -23,13 +23,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kInputSize = 2; | |||
| constexpr size_t kOutputSize = 1; | |||
| constexpr size_t kSearchSortedInputsNum = 2; | |||
| constexpr size_t kSearchSortedOutputsNum = 1; | |||
| } // namespace | |||
| template <typename S, typename T> | |||
| void SearchSortedCPUKernel<S, T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| right_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "right"); | |||
| sequence_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| values_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| @@ -76,16 +77,8 @@ bool SearchSortedCPUKernel<S, T>::Launch(const std::vector<kernel::AddressPtr> & | |||
| template <typename S, typename T> | |||
| void SearchSortedCPUKernel<S, T>::CheckParam(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| // inputs: sequence, values | |||
| if (inputs.size() != kInputSize) { | |||
| MS_LOG(EXCEPTION) << "Input number is: " << inputs.size() << ", but SearchSorted needs" << kInputSize << " inputs."; | |||
| } | |||
| // outputs: positions | |||
| if (outputs.size() != kOutputSize) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SearchSorted needs " << kOutputSize | |||
| << " outputs"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSearchSortedInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSearchSortedOutputsNum, kernel_name_); | |||
| if (outputs[0]->size / sizeof(T) != inputs[1]->size / sizeof(S)) { | |||
| MS_LOG(EXCEPTION) << "The output dimensions " << outputs[0]->size << " must match the dimensions of input values " | |||
| @@ -39,10 +39,10 @@ class SearchSortedCPUKernel : public CPUKernel { | |||
| void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| bool right_{false}; | |||
| size_t search_len{0}; | |||
| std::vector<size_t> sequence_shape_; | |||
| std::vector<size_t> values_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| size_t search_len{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T_S( | |||
| @@ -19,31 +19,30 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSelectInputsNum = 3; | |||
| constexpr size_t kSelectOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void SelectCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SelectCpuKernel needs 3 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SelectCpuKernel needs 1 output."; | |||
| } | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (size_t x : shape) { | |||
| element_num_ *= x; | |||
| } | |||
| return; | |||
| } | |||
| template <typename T> | |||
| bool SelectCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto input_cond = reinterpret_cast<bool *>(inputs[0]->addr); | |||
| auto input_x = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto input_y = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSelectInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSelectOutputsNum, kernel_name_); | |||
| auto *input_cond = reinterpret_cast<bool *>(inputs[0]->addr); | |||
| auto *input_x = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *input_y = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| for (size_t pos = 0; pos < element_num_; pos++) { | |||
| output[pos] = input_cond[pos] ? input_x[pos] : input_y[pos]; | |||
| } | |||
| @@ -15,42 +15,29 @@ | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/sgd_cpu_kernel.h" | |||
| #include <thread> | |||
| #include <vector> | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kInputSize = 6; | |||
| constexpr size_t kOutputSize = 1; | |||
| constexpr size_t kSGDInputsNum = 6; | |||
| constexpr size_t kSGDOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void SGDCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| dampening_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "dampening"); | |||
| weight_decay_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "weight_decay"); | |||
| nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "nesterov"); | |||
| } | |||
| template <typename T> | |||
| void SGDCPUKernel<T>::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| // inputs: param, grad, lr, accum, momentum, stat | |||
| if (inputs.size() != kInputSize) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but SGD needs 6 inputs."; | |||
| } | |||
| // output: output_param | |||
| if (outputs.size() != kOutputSize) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SGD needs 1 outputs."; | |||
| } | |||
| } | |||
| template <typename T> | |||
| bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| CheckParam(inputs, outputs); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSGDInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSGDOutputsNum, kernel_name_); | |||
| auto param = reinterpret_cast<T *>(inputs[PARAM]->addr); | |||
| auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr); | |||
| auto lr = reinterpret_cast<T *>(inputs[LR]->addr); | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SGD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SGD_CPU_KERNEL_H_ | |||
| @@ -35,7 +36,6 @@ class SGDCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| float dampening_{0.0}; | |||
| float weight_decay_{0.0}; | |||
| bool nesterov_{true}; | |||
| @@ -19,9 +19,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSigmoidCrossEntropyWithLogitsInputsNum = 2; | |||
| constexpr size_t kSigmoidCrossEntropyWithLogitsOutputsNum = 1; | |||
| } // namespace | |||
| void SigmoidCrossEntropyWithLogitsCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const uint64_t &d : x_shape) { | |||
| @@ -45,12 +50,14 @@ bool SigmoidCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kernel::Ad | |||
| template <typename T> | |||
| void SigmoidCrossEntropyWithLogitsCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto logits_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto labels_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| T zero = (T)0.0; | |||
| T one = (T)1.0; | |||
| T two = (T)2.0; | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSigmoidCrossEntropyWithLogitsInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSigmoidCrossEntropyWithLogitsOutputsNum, kernel_name_); | |||
| auto *logits_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *labels_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto zero = static_cast<T>(0.0); | |||
| auto one = static_cast<T>(1.0); | |||
| auto two = static_cast<T>(2.0); | |||
| for (uint64_t i = 0; i < tensor_size_; ++i) { | |||
| if (logits_addr[i] >= zero) { | |||
| output_addr[i] = static_cast<T>(log1p(static_cast<float>(exp(logits_addr[i] - two * logits_addr[i])))) - | |||
| @@ -60,16 +67,5 @@ void SigmoidCrossEntropyWithLogitsCPUKernel::LaunchKernel(const std::vector<Addr | |||
| } | |||
| } | |||
| } | |||
| void SigmoidCrossEntropyWithLogitsCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "SigmoidCrossEntropyWithLogitsCPUKernel needs 2 inputs, but gets " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "SigmoidCrossEntropyWithLogitsCPUKernel expects 1 output, but gets" << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -34,11 +35,10 @@ class SigmoidCrossEntropyWithLogitsCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| uint64_t tensor_size_{1}; | |||
| }; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -19,9 +19,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSigmoidCrossEntropyWithLogitsGradInputsNum = 3; | |||
| constexpr size_t kSigmoidCrossEntropyWithLogitsGradOutputsNum = 1; | |||
| } // namespace | |||
| void SigmoidCrossEntropyWithLogitsGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const uint64_t &d : x_shape) { | |||
| @@ -32,6 +37,8 @@ void SigmoidCrossEntropyWithLogitsGradCPUKernel::InitKernel(const CNodePtr &kern | |||
| bool SigmoidCrossEntropyWithLogitsGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSigmoidCrossEntropyWithLogitsGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSigmoidCrossEntropyWithLogitsGradOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) { | |||
| @@ -45,12 +52,12 @@ bool SigmoidCrossEntropyWithLogitsGradCPUKernel::Launch(const std::vector<kernel | |||
| template <typename T> | |||
| void SigmoidCrossEntropyWithLogitsGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto logits_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto labels_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto dloss_addr = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| T zero = (T)0.0; | |||
| T one = (T)1.0; | |||
| auto *logits_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *labels_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *dloss_addr = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto zero = static_cast<T>(0.0); | |||
| auto one = static_cast<T>(1.0); | |||
| for (uint64_t i = 0; i < tensor_size_; ++i) { | |||
| if (logits_addr[i] >= zero) { | |||
| output_addr[i] = (one / (one + static_cast<T>(exp(-logits_addr[i]))) - labels_addr[i]) * dloss_addr[i]; | |||
| @@ -60,16 +67,5 @@ void SigmoidCrossEntropyWithLogitsGradCPUKernel::LaunchKernel(const std::vector< | |||
| } | |||
| } | |||
| } | |||
| void SigmoidCrossEntropyWithLogitsGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "SigmoidCrossEntropyWithLogitsCPUKernel needs 2 inputs, but gets " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "SigmoidCrossEntropyWithLogitsCPUKernel expects 1 output, but gets" << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SIGMOID_CROSS_ENTROPY_WITH_LOGITS_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -34,11 +35,10 @@ class SigmoidCrossEntropyWithLogitsGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| uint64_t tensor_size_{1}; | |||
| }; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -15,15 +15,18 @@ | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/slice_cpu_kernel.h" | |||
| #include <algorithm> | |||
| #include <unordered_map> | |||
| #include "common/thread_pool.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSliceInputsNum = 1; | |||
| constexpr size_t kSliceOutputsNum = 1; | |||
| } // namespace | |||
| int NormalizeBeginPos(int begin_pos, int dim_len) { | |||
| if (begin_pos < 0) { | |||
| int normal_pos = begin_pos + dim_len; | |||
| @@ -34,6 +37,7 @@ int NormalizeBeginPos(int begin_pos, int dim_len) { | |||
| void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| static const std::unordered_map<TypeId, int> type_size_map = {{kNumberTypeBool, sizeof(bool)}, | |||
| {kNumberTypeInt32, sizeof(int)}, | |||
| {kNumberTypeFloat32, sizeof(float)}, | |||
| @@ -84,29 +88,29 @@ void SliceCPUKernel::InitSliceParam(const std::vector<size_t> &input_shape, cons | |||
| slice_param_.param_length_ = DIMENSION_8D; | |||
| } | |||
| void SliceSimpleDim2(const int8_t *input, int8_t *output, SliceParameter *param, int data_size, size_t row_size) { | |||
| size_t copy_size = data_size * param->size_[1]; | |||
| void SliceSimpleDim2(const int8_t *input, int8_t *output, const SliceParameter *param, int data_size, size_t row_size) { | |||
| size_t copy_size = IntToSize(data_size * param->size_[1]); | |||
| for (size_t i = 0; i < row_size; ++i) { | |||
| auto dst = output + data_size * param->size_[1] * i; | |||
| auto src = input + data_size * (param->shape_[1] * i + param->begin_[1]); | |||
| (void)memcpy_s(dst, copy_size, src, copy_size); | |||
| auto ret = memcpy_s(dst, copy_size, src, copy_size); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "Memcpy failed."; | |||
| } | |||
| } | |||
| } | |||
| bool SliceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() != 1 || outputs.size() != 1) { | |||
| MS_LOG(ERROR) << "Slice requires 1 input and 1 output, but got " << inputs.size() << " input and " << outputs.size() | |||
| << " output."; | |||
| return false; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSliceInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSliceOutputsNum, kernel_name_); | |||
| if (outputs[0]->size == 0) { | |||
| MS_LOG(WARNING) << "Slice output memory size should be greater than 0, but got 0."; | |||
| return true; | |||
| } | |||
| auto input_addr = inputs[0]->addr; | |||
| auto output_addr = outputs[0]->addr; | |||
| if (origin_dim_size_ == 2) { | |||
| auto task = [this, &input_addr, &output_addr](size_t start, size_t end) { | |||
| auto src = | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -19,7 +19,6 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| #include "nnacl/base/slice_base.h" | |||
| @@ -39,7 +38,6 @@ class SliceCPUKernel : public CPUKernel { | |||
| private: | |||
| void InitSliceParam(const std::vector<size_t> &input_shape, const std::vector<int64_t> &begin, | |||
| const std::vector<int64_t> &size); | |||
| size_t origin_dim_size_{0}; | |||
| int data_size_{4}; | |||
| SliceParameter slice_param_; | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/slice_grad_cpu_kernel.h" | |||
| #include <algorithm> | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| @@ -20,11 +21,22 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSliceGradInputsNum = 2; | |||
| constexpr size_t kStridedSliceGradInputsNum = 1; | |||
| constexpr size_t kOutputsNum = 1; | |||
| } // namespace | |||
| void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.empty() || input_shape.size() > 4) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but SliceGradGpuKernel only support 1-4D."; | |||
| } | |||
| std::vector<int64_t> begin_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, BEGIN); | |||
| (void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_), | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| @@ -51,6 +63,7 @@ void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| } | |||
| FormatArgs(false); | |||
| } | |||
| ExpandAllMemberDims(); | |||
| CPUKernelUtils::GetElementNumEveryDim(input_shape_, &input_element_num_); | |||
| CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); | |||
| @@ -60,10 +73,10 @@ void SliceGradCPUKernel::ExpandAllMemberDims() { | |||
| auto output_len = output_shape_.size(); | |||
| if (output_len < 4) { | |||
| for (size_t i = 0; i < 4 - output_len; ++i) { | |||
| output_shape_.insert(output_shape_.begin(), 1); | |||
| begin_.insert(begin_.begin(), 0); | |||
| strides_.insert(strides_.begin(), 1); | |||
| end_.insert(end_.begin(), 1); | |||
| (void)output_shape_.insert(output_shape_.begin(), 1); | |||
| (void)begin_.insert(begin_.begin(), 0); | |||
| (void)strides_.insert(strides_.begin(), 1); | |||
| (void)end_.insert(end_.begin(), 1); | |||
| } | |||
| } | |||
| for (size_t i = 0; i < 4; ++i) { | |||
| @@ -79,7 +92,12 @@ void SliceGradCPUKernel::ExpandAllMemberDims() { | |||
| bool SliceGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| bool ret{true}; | |||
| size_t expect_inputs_num = | |||
| kernel_name_ == prim::kPrimSliceGrad->name() ? kSliceGradInputsNum : kStridedSliceGradInputsNum; | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), expect_inputs_num, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_); | |||
| bool ret = true; | |||
| if (dtype_ == kNumberTypeInt32) { | |||
| ret = LaunchKernel<int>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -96,9 +114,9 @@ bool SliceGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, c | |||
| template <typename T> | |||
| bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| T *input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| const std::vector<kernel::AddressPtr> &outputs) const { | |||
| auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size); | |||
| if (ret != EOK) { | |||
| @@ -113,16 +131,17 @@ bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inp | |||
| size_t out_step_size[3] = {IntToSize(strides_[0]) * output_element_num_[0], | |||
| IntToSize(strides_[1]) * output_element_num_[1], | |||
| IntToSize(strides_[2]) * output_element_num_[2]}; | |||
| auto in_n_offset = 0; | |||
| auto out_n_offset = out_start_offset[0]; | |||
| size_t in_n_offset = 0; | |||
| size_t out_n_offset = out_start_offset[0]; | |||
| size_t input_index = 0; | |||
| for (int i = begin_[0]; stride_signs[0] * i < stride_signs[0] * end_[0]; | |||
| i += strides_[0], in_n_offset += input_element_num_[0], out_n_offset += out_step_size[0]) { | |||
| if (can_copy_memory[0]) { | |||
| CopyDataToOutput<T>(inputs, in_n_offset, outputs, out_n_offset, input_element_num_[0], 0); | |||
| continue; | |||
| } | |||
| auto in_c_offset = 0; | |||
| auto out_c_offset = out_start_offset[1]; | |||
| size_t in_c_offset = 0; | |||
| size_t out_c_offset = out_start_offset[1]; | |||
| for (int j = begin_[1]; stride_signs[1] * j < stride_signs[1] * end_[1]; | |||
| j += strides_[1], in_c_offset += input_element_num_[1], out_c_offset += out_step_size[1]) { | |||
| if (can_copy_memory[1]) { | |||
| @@ -130,8 +149,8 @@ bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inp | |||
| input_element_num_[1], 1); | |||
| continue; | |||
| } | |||
| auto in_h_offset = 0; | |||
| auto out_h_offset = out_start_offset[2]; | |||
| size_t in_h_offset = 0; | |||
| size_t out_h_offset = out_start_offset[2]; | |||
| for (int k = begin_[2]; stride_signs[2] * k < stride_signs[2] * end_[2]; | |||
| k += strides_[2], in_h_offset += input_element_num_[2], out_h_offset += out_step_size[2]) { | |||
| if (can_copy_memory[2]) { | |||
| @@ -140,7 +159,7 @@ bool SliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inp | |||
| continue; | |||
| } | |||
| for (int m = begin_[3]; stride_signs[3] * m < stride_signs[3] * end_[3]; m += strides_[3]) { | |||
| output_addr[out_n_offset + out_c_offset + out_h_offset + IntToSize(m)] = *input_addr++; | |||
| output_addr[out_n_offset + out_c_offset + out_h_offset + IntToSize(m)] = input_addr[input_index++]; | |||
| } | |||
| } | |||
| } | |||
| @@ -223,19 +242,5 @@ void SliceGradCPUKernel::FormatArgs(bool stride) { | |||
| } | |||
| } | |||
| } | |||
| void SliceGradCPUKernel::CheckParam(const CNodePtr &kernel_node) const { | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SliceGradGpuKernel needs 1 output."; | |||
| } | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.size() > 4) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but SliceGradGpuKernel only support 4d or lower."; | |||
| } | |||
| if (input_shape.size() == 0) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", scalar is not supported."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_GRAD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -34,16 +36,16 @@ class SliceGradCPUKernel : public CPUKernel { | |||
| private: | |||
| template <typename T> | |||
| bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs); | |||
| bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) const; | |||
| template <typename T> | |||
| void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t in_offset, | |||
| const std::vector<kernel::AddressPtr> &outputs, size_t out_offset, size_t copy_num, | |||
| int id) const; | |||
| void ExpandAllMemberDims(); | |||
| bool CanCopyMemoryOnAxis(size_t dim) const; | |||
| int SignOfStride(size_t axis) const; | |||
| void CheckParam(const CNodePtr &kernel_node) const; | |||
| void FormatArgs(bool stride); | |||
| std::vector<int> begin_; | |||
| std::vector<int> end_; | |||
| @@ -19,11 +19,19 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSmoothL1LossInputsNum = 2; | |||
| constexpr size_t kSmoothL1LossOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void SmoothL1LossCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| beta_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "beta"); | |||
| CheckParam(kernel_node); | |||
| if (beta_ == 0.0) { | |||
| MS_LOG(EXCEPTION) << "Attr beta can not be zero."; | |||
| } | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const uint64_t &d : x_shape) { | |||
| tensor_size_ *= d; | |||
| @@ -34,9 +42,11 @@ template <typename T> | |||
| bool SmoothL1LossCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto predict_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto target_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto result_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSmoothL1LossInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSmoothL1LossOutputsNum, kernel_name_); | |||
| const auto *predict_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| const auto *target_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *result_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| T zero = (T)0.0; | |||
| T half = (T)0.5; | |||
| T beta = (T)beta_; | |||
| @@ -56,20 +66,5 @@ bool SmoothL1LossCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void SmoothL1LossCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SmoothL1LossCPUKernel needs 2 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SmoothL1LossCPUKernel needs 1 output."; | |||
| } | |||
| if (beta_ == 0.0) { | |||
| MS_LOG(EXCEPTION) << "Attr beta can not be zero."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -37,10 +37,9 @@ class SmoothL1LossCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| float beta_ = 1.0; | |||
| float beta_{1.0}; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| uint64_t tensor_size_ = 1; | |||
| uint64_t tensor_size_{1}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T( | |||
| @@ -19,11 +19,19 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSmoothL1LossGradInputsNum = 3; | |||
| constexpr size_t kSmoothL1LossGradOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void SmoothL1LossGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| beta_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "beta"); | |||
| CheckParam(kernel_node); | |||
| if (beta_ == 0.0) { | |||
| MS_LOG(EXCEPTION) << "Attr beta can not be zero."; | |||
| } | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const uint64_t &d : x_shape) { | |||
| tensor_size_ *= d; | |||
| @@ -34,10 +42,12 @@ template <typename T> | |||
| bool SmoothL1LossGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto predict_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto target_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto dloss_addr = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto result_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSmoothL1LossGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSmoothL1LossGradOutputsNum, kernel_name_); | |||
| const auto *predict_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| const auto *target_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| const auto *dloss_addr = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto *result_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| T beta = (T)beta_; | |||
| for (uint64_t i = 0; i < tensor_size_; ++i) { | |||
| T diff = predict_addr[i] - target_addr[i]; | |||
| @@ -51,20 +61,5 @@ bool SmoothL1LossGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> | |||
| } | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void SmoothL1LossGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but SmoothL1LossGradCPUKernel needs 3 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but SmoothL1LossGradCPUKernel needs 1 output."; | |||
| } | |||
| if (beta_ == 0.0) { | |||
| MS_LOG(EXCEPTION) << "Attr beta can not be zero."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -37,7 +37,6 @@ class SmoothL1LossGradCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| float beta_{1.0}; | |||
| uint64_t tensor_size_{1}; | |||
| }; | |||
| @@ -15,27 +15,39 @@ | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/spacetodepth_cpu_kernel.h" | |||
| #include <vector> | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSpaceToDepthInputsNum = 1; | |||
| constexpr size_t kSpaceToDepthOutputsNum = 1; | |||
| constexpr size_t kSpaceToDepthInputShapeSize = 4; | |||
| constexpr size_t kSpaceToDepthMinBlockSize = 2; | |||
| } // namespace | |||
| template <typename T> | |||
| void SpaceToDepthCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| block_size_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size")); | |||
| if (input_shape_.size() != kSpaceToDepthInputShapeSize) { | |||
| MS_LOG(EXCEPTION) << "Input shape must be a 4-D tensor, but got " << input_shape_.size() << "-D"; | |||
| } | |||
| if (block_size_ < kSpaceToDepthMinBlockSize) { | |||
| MS_LOG(EXCEPTION) << "The block size must be >= " << kSpaceToDepthMinBlockSize << ", but got " << block_size_; | |||
| } | |||
| } | |||
| template <typename T> | |||
| bool SpaceToDepthCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /* workspace */, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSpaceToDepthInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSpaceToDepthOutputsNum, kernel_name_); | |||
| auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t size = inputs[0]->size / sizeof(T); | |||
| @@ -75,17 +87,5 @@ bool SpaceToDepthCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp | |||
| CPUKernelUtils::ParallelFor(task, size); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void SpaceToDepthCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but DepthToSpaceCPUKerrnel needs 1 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but DepthToSpaceCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -13,11 +13,12 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPACETODEPTH_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPACETODEPTH_CPU_KERNEL_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| @@ -33,10 +34,9 @@ class SpaceToDepthCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| size_t block_size_{0}; | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| size_t block_size_{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T( | |||
| @@ -21,7 +21,8 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSparseApplyAdamInputSize = 11; | |||
| constexpr size_t kSparseApplyAdamInputsNum = 11; | |||
| constexpr size_t kSparseApplyAdamWorkspaceSize = 5; | |||
| template <typename T> | |||
| void ComputeAdam(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) { | |||
| @@ -100,6 +101,7 @@ void SparseApplyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) | |||
| void SparseApplyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| std::vector<size_t> m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| std::vector<size_t> v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| @@ -140,9 +142,9 @@ void SparseApplyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace) const { | |||
| auto var = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto m = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto v = reinterpret_cast<float *>(inputs[2]->addr); | |||
| auto *var = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto *m = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto *v = reinterpret_cast<float *>(inputs[2]->addr); | |||
| auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0]; | |||
| if (beta1_power == 1) { | |||
| MS_LOG(EXCEPTION) << "The beta1_power should not be 1"; | |||
| @@ -152,13 +154,13 @@ void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr | |||
| auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0]; | |||
| auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0]; | |||
| auto epsilon = reinterpret_cast<float *>(inputs[8]->addr)[0]; | |||
| auto grad = reinterpret_cast<float *>(inputs[9]->addr); | |||
| auto indices = reinterpret_cast<T *>(inputs[10]->addr); | |||
| auto new_grad = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto new_indices = reinterpret_cast<T *>(workspace[1]->addr); | |||
| auto workspace_grad = reinterpret_cast<float *>(workspace[2]->addr); | |||
| auto workspace_indices = reinterpret_cast<T *>(workspace[3]->addr); | |||
| auto m_t = reinterpret_cast<float *>(workspace[4]->addr); | |||
| auto *grad = reinterpret_cast<float *>(inputs[9]->addr); | |||
| auto *indices = reinterpret_cast<T *>(inputs[10]->addr); | |||
| auto *new_grad = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto *new_indices = reinterpret_cast<T *>(workspace[1]->addr); | |||
| auto *workspace_grad = reinterpret_cast<float *>(workspace[2]->addr); | |||
| auto *workspace_indices = reinterpret_cast<T *>(workspace[3]->addr); | |||
| auto *m_t = reinterpret_cast<float *>(workspace[4]->addr); | |||
| SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_}); | |||
| SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_}); | |||
| @@ -180,7 +182,6 @@ void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr | |||
| input_params.beta1_ = beta1; | |||
| input_params.beta2_ = beta2; | |||
| MultiThreadCompute<T>(ComputeMomentum<T>, &input_params, total_dim_size); | |||
| input_params.m_t_ = m_t; | |||
| input_params.use_nesterov_ = use_nesterov_; | |||
| input_params.sparse_grad_ = unique_sparse_grad; | |||
| @@ -200,9 +201,8 @@ void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr | |||
| bool SparseApplyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &) { | |||
| if (inputs.size() < kSparseApplyAdamInputSize) { | |||
| MS_LOG(EXCEPTION) << "Error input size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyAdamInputsNum, kernel_name_); | |||
| CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyAdamWorkspaceSize, kernel_name_); | |||
| if (indices_data_type_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, workspace); | |||
| } else if (indices_data_type_ == kNumberTypeInt64) { | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_CPU_KERNEL_H_ | |||
| @@ -27,17 +28,21 @@ class SparseApplyAdamCPUKernel : public SparseOptimizerCPUKernel { | |||
| ~SparseApplyAdamCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| bool use_nesterov_{false}; | |||
| private: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| template <typename T> | |||
| void InitWorkspaceSize(); | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace) const; | |||
| protected: | |||
| bool use_nesterov_{false}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(FusedSparseAdam, | |||
| @@ -21,7 +21,9 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSparseApplyFtrlInputSize = 5; | |||
| constexpr size_t kSparseApplyFtrlInputsNum = 5; | |||
| constexpr size_t kSparseApplyFtrlWorkspaceSize = 4; | |||
| template <typename T> | |||
| void ComputeFtrl(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) { | |||
| MS_EXCEPTION_IF_NULL(input_params); | |||
| @@ -74,8 +76,10 @@ void SparseApplyFtrlCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| if (indices_data_type_ == kNumberTypeInt32) { | |||
| InitWorkspaceSize<int>(); | |||
| } else { | |||
| } else if (indices_data_type_ == kNumberTypeInt64) { | |||
| InitWorkspaceSize<int64_t>(); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Input data type " << indices_data_type_ << " is unsupported"; | |||
| } | |||
| } | |||
| @@ -135,15 +139,15 @@ void SparseApplyFtrlCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void SparseApplyFtrlCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace) const { | |||
| auto var = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto accum = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto linear = reinterpret_cast<float *>(inputs[2]->addr); | |||
| auto grad = reinterpret_cast<float *>(inputs[3]->addr); | |||
| auto indices = reinterpret_cast<T *>(inputs[4]->addr); | |||
| auto new_grad = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto new_indices = reinterpret_cast<T *>(workspace[1]->addr); | |||
| auto workspace_grad = reinterpret_cast<float *>(workspace[2]->addr); | |||
| auto workspace_indices = reinterpret_cast<T *>(workspace[3]->addr); | |||
| auto *var = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto *accum = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto *linear = reinterpret_cast<float *>(inputs[2]->addr); | |||
| auto *grad = reinterpret_cast<float *>(inputs[3]->addr); | |||
| auto *indices = reinterpret_cast<T *>(inputs[4]->addr); | |||
| auto *new_grad = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto *new_indices = reinterpret_cast<T *>(workspace[1]->addr); | |||
| auto *workspace_grad = reinterpret_cast<float *>(workspace[2]->addr); | |||
| auto *workspace_indices = reinterpret_cast<T *>(workspace[3]->addr); | |||
| SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_}); | |||
| SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_}); | |||
| @@ -173,10 +177,8 @@ void SparseApplyFtrlCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr | |||
| bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &) { | |||
| if (inputs.size() < kSparseApplyFtrlInputSize) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyFtrlInputsNum, kernel_name_); | |||
| CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyFtrlWorkspaceSize, kernel_name_); | |||
| if (indices_data_type_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, workspace); | |||
| } else if (indices_data_type_ == kNumberTypeInt64) { | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_FTRL_CPU_KERNEL_H_ | |||
| @@ -27,20 +28,24 @@ class SparseApplyFtrlCPUKernel : public SparseOptimizerCPUKernel { | |||
| ~SparseApplyFtrlCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| float lr_{0.0}; | |||
| float l1_{0.0}; | |||
| float l2_{0.0}; | |||
| float lr_power_{0.0}; | |||
| private: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| template <typename T> | |||
| void InitWorkspaceSize(); | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace) const; | |||
| protected: | |||
| float lr_{0}; | |||
| float l1_{0}; | |||
| float l2_{0}; | |||
| float lr_power_{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(FusedSparseFtrl, | |||
| @@ -21,7 +21,8 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSparseApplyLazyAdamInputSize = 11; | |||
| constexpr size_t kSparseApplyLazyAdamInputsNum = 11; | |||
| constexpr size_t kSparseApplyLazyAdamWorkspaceSize = 4; | |||
| template <typename T> | |||
| void ComputeLazyAdam(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) { | |||
| @@ -70,13 +71,16 @@ void SparseApplyLazyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_no | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| if (indices_data_type_ == kNumberTypeInt32) { | |||
| InitWorkspaceSize<int>(); | |||
| } else { | |||
| } else if (indices_data_type_ == kNumberTypeInt64) { | |||
| InitWorkspaceSize<int64_t>(); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Input data type " << indices_data_type_ << " is unsupported"; | |||
| } | |||
| } | |||
| void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| std::vector<size_t> m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| std::vector<size_t> v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| @@ -103,14 +107,14 @@ void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| var_outer_dim_size_ *= var_shape[i]; | |||
| } | |||
| if (indices_shape.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "Indices must be 1D!"; | |||
| MS_LOG(EXCEPTION) << "Indices must be 1D"; | |||
| } | |||
| indices_size_ = indices_shape[0]; | |||
| if (grad_shape[0] != indices_size_) { | |||
| MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; | |||
| } | |||
| if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) { | |||
| use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov"); | |||
| use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, USE_NESTEROV); | |||
| } | |||
| indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 10); | |||
| } | |||
| @@ -118,9 +122,9 @@ void SparseApplyLazyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| void SparseApplyLazyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace) const { | |||
| auto var = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto m = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto v = reinterpret_cast<float *>(inputs[2]->addr); | |||
| auto *var = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto *m = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto *v = reinterpret_cast<float *>(inputs[2]->addr); | |||
| auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0]; | |||
| if (beta1_power == 1) { | |||
| MS_LOG(EXCEPTION) << "The beta1_power should not be 1"; | |||
| @@ -130,12 +134,12 @@ void SparseApplyLazyAdamCPUKernel::LaunchKernel(const std::vector<kernel::Addres | |||
| auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0]; | |||
| auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0]; | |||
| auto epsilon = reinterpret_cast<float *>(inputs[8]->addr)[0]; | |||
| auto grad = reinterpret_cast<float *>(inputs[9]->addr); | |||
| auto indices = reinterpret_cast<T *>(inputs[10]->addr); | |||
| auto new_grad = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto new_indices = reinterpret_cast<T *>(workspace[1]->addr); | |||
| auto workspace_grad = reinterpret_cast<float *>(workspace[2]->addr); | |||
| auto workspace_indices = reinterpret_cast<T *>(workspace[3]->addr); | |||
| auto *grad = reinterpret_cast<float *>(inputs[9]->addr); | |||
| auto *indices = reinterpret_cast<T *>(inputs[10]->addr); | |||
| auto *new_grad = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto *new_indices = reinterpret_cast<T *>(workspace[1]->addr); | |||
| auto *workspace_grad = reinterpret_cast<float *>(workspace[2]->addr); | |||
| auto *workspace_indices = reinterpret_cast<T *>(workspace[3]->addr); | |||
| SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_}); | |||
| SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_}); | |||
| @@ -167,10 +171,8 @@ void SparseApplyLazyAdamCPUKernel::LaunchKernel(const std::vector<kernel::Addres | |||
| bool SparseApplyLazyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &) { | |||
| if (inputs.size() < kSparseApplyLazyAdamInputSize) { | |||
| MS_LOG(EXCEPTION) << "Error input size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyLazyAdamInputsNum, kernel_name_); | |||
| CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyLazyAdamWorkspaceSize, kernel_name_); | |||
| if (indices_data_type_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, workspace); | |||
| } else if (indices_data_type_ == kNumberTypeInt64) { | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_CPU_KERNEL_H_ | |||
| @@ -27,17 +28,20 @@ class SparseApplyLazyAdamCPUKernel : public SparseOptimizerCPUKernel { | |||
| ~SparseApplyLazyAdamCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| bool use_nesterov_{false}; | |||
| private: | |||
| template <typename T> | |||
| void InitWorkspaceSize(); | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace) const; | |||
| protected: | |||
| bool use_nesterov_{false}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(FusedSparseLazyAdam, | |||
| @@ -21,7 +21,8 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSparseApplyProximalAdagradInputSize = 7; | |||
| constexpr size_t kSparseApplyProximalAdagradInputsNum = 7; | |||
| constexpr size_t kSparseApplyProximalAdagradWorkspaceSize = 4; | |||
| template <typename T> | |||
| void ComputeProximalAdagrad(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) { | |||
| @@ -70,13 +71,16 @@ void SparseApplyProximalAdagradCPUKernel::InitInputOutputSize(const CNodePtr &ke | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| if (indices_data_type_ == kNumberTypeInt32) { | |||
| InitWorkspaceSize<int>(); | |||
| } else { | |||
| } else if (indices_data_type_ == kNumberTypeInt64) { | |||
| InitWorkspaceSize<int64_t>(); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Input data type " << indices_data_type_ << " is unsupported"; | |||
| } | |||
| } | |||
| void SparseApplyProximalAdagradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| std::vector<size_t> accum_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| std::vector<size_t> lr_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| @@ -160,9 +164,8 @@ void SparseApplyProximalAdagradCPUKernel::LaunchKernel(const std::vector<kernel: | |||
| bool SparseApplyProximalAdagradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &) { | |||
| if (inputs.size() < kSparseApplyProximalAdagradInputSize) { | |||
| MS_LOG(EXCEPTION) << "Wrong input size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyProximalAdagradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyProximalAdagradWorkspaceSize, kernel_name_); | |||
| if (indices_data_type_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, workspace); | |||
| } else if (indices_data_type_ == kNumberTypeInt64) { | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_PROXIMAL_ADAGRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_PROXIMAL_ADAGRAD_CPU_KERNEL_H_ | |||
| @@ -27,11 +28,16 @@ class SparseApplyProximalAdagradCPUKernel : public SparseOptimizerCPUKernel { | |||
| ~SparseApplyProximalAdagradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| private: | |||
| template <typename T> | |||
| void InitWorkspaceSize(); | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace) const; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_OPTIMIZER_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_OPTIMIZER_CPU_KERNEL_H_ | |||
| @@ -63,6 +64,7 @@ struct MultiThreadComputeParams { | |||
| size_t var_outer_dim_size_{0}; | |||
| bool use_nesterov_; | |||
| }; | |||
| template <typename T> | |||
| using MultiThreadComputeFunc = std::function<void(MultiThreadComputeParams<T> *param, size_t start, size_t end)>; | |||
| @@ -205,7 +207,7 @@ class SparseOptimizerCPUKernel : public CPUKernel { | |||
| MS_LOG(DEBUG) << "Start"; | |||
| MS_EXCEPTION_IF_NULL(segment); | |||
| MS_EXCEPTION_IF_NULL(segment->indices_); | |||
| if (param.thread_num_ < 1) { | |||
| if (param.thread_num_ == 0) { | |||
| MS_EXCEPTION(ArgumentError) << "Input param thread num must > 0!"; | |||
| } | |||
| std::vector<size_t> bucket_data_num(param.thread_num_, 0); | |||
| @@ -20,12 +20,18 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSparseTensorDenseMatmulInputsNum = 4; | |||
| constexpr size_t kSparseTensorDenseMatmulOutputsNum = 1; | |||
| constexpr size_t kSparseTensorDenseMatmulOutputShapeSize = 2; | |||
| constexpr size_t kSparseTensorDenseMatmulDenseShapeSize = 2; | |||
| constexpr size_t kIndicesSizeNum = 2; | |||
| constexpr size_t kIndices2rdDimNum = 2; | |||
| } // namespace | |||
| template <typename I, typename T> | |||
| void SparseTensorDenseMatmulCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| adj_st_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_ST); | |||
| adj_dt_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_dT); | |||
| auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, INDICES); | |||
| @@ -59,11 +65,8 @@ template <typename I, typename T> | |||
| bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /* workspace */, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() != kInputNum || outputs.size() != kOutputNum) { | |||
| MS_LOG(ERROR) << "SparseTensorDenseMatmul requires 4 inputs and 1 output, but got " << inputs.size() | |||
| << " inputs and " << outputs.size() << " output."; | |||
| return false; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseTensorDenseMatmulInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseTensorDenseMatmulOutputsNum, kernel_name_); | |||
| if (outputs[0]->size == 0) { | |||
| MS_LOG(WARNING) << "SparseTensorDenseMatmul output memory size should be greater than 0, but got 0."; | |||
| return true; | |||
| @@ -72,13 +75,16 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad | |||
| MS_LOG(EXCEPTION) << "SparseTensorDenseMatmul memset output failed!"; | |||
| } | |||
| const size_t b_index = 3; | |||
| const auto *a_indices = reinterpret_cast<I *>(inputs[0]->addr); | |||
| const auto *a_values = reinterpret_cast<T *>(inputs[1]->addr); | |||
| const auto *b = reinterpret_cast<T *>(inputs[3]->addr); | |||
| const auto *b = reinterpret_cast<T *>(inputs[b_index]->addr); | |||
| auto *out = reinterpret_cast<T *>(outputs[0]->addr); | |||
| const size_t indices_length = inputs[0]->size / sizeof(I); | |||
| const size_t values_length = inputs[1]->size / sizeof(T); | |||
| const size_t b_length = inputs[3]->size / sizeof(T); | |||
| const size_t b_length = inputs[b_index]->size / sizeof(T); | |||
| const size_t dim_num = 2; | |||
| const size_t out_dim_0 = output_shape_[0]; | |||
| const size_t out_dim_1 = output_shape_[1]; | |||
| const size_t b_dim_0 = b_shape_[0]; | |||
| @@ -86,14 +92,14 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad | |||
| const size_t same_dim = adj_dt_ ? b_dim_1 : b_dim_0; | |||
| for (size_t i = 0; i < values_size_; ++i) { | |||
| if (i * 2 + 1 >= indices_length) { // the interval is 2 | |||
| if (i * dim_num + 1 >= indices_length) { | |||
| MS_LOG(EXCEPTION) << "The index of a_indices out of bounds."; | |||
| } | |||
| if (i >= values_length) { | |||
| MS_LOG(EXCEPTION) << "The index of a_values out of bounds."; | |||
| } | |||
| const int row = adj_st_ ? a_indices[i * 2 + 1] : a_indices[i * 2]; | |||
| const int col = adj_st_ ? a_indices[i * 2] : a_indices[i * 2 + 1]; | |||
| const int row = adj_st_ ? a_indices[i * dim_num + 1] : a_indices[i * dim_num]; | |||
| const int col = adj_st_ ? a_indices[i * dim_num] : a_indices[i * dim_num + 1]; | |||
| if (row >= SizeToInt(out_dim_0) || row < 0 || col >= SizeToInt(same_dim) || col < 0) { | |||
| MS_EXCEPTION(ValueError) << "The indices including out of bounds index, row range: [0, " << out_dim_0 | |||
| << "), col range: [0, " << same_dim << "), but got row: " << row << ", col: " << col; | |||
| @@ -23,10 +23,6 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr size_t kInputNum = 4; | |||
| constexpr size_t kOutputNum = 1; | |||
| constexpr size_t kIndicesSizeNum = 2; | |||
| constexpr size_t kIndices2rdDimNum = 2; | |||
| template <typename I, typename T> | |||
| class SparseTensorDenseMatmulCPUKernel : public CPUKernel { | |||
| public: | |||
| @@ -22,12 +22,14 @@ namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kIndicesShapeSize = 2; | |||
| constexpr size_t kSparseToDenseInputsNum = 3; | |||
| constexpr size_t kSparseToDenseOutputsNum = 1; | |||
| } // namespace | |||
| template <typename I, typename T> | |||
| void SparseToDenseCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (indices_shape.size() != kIndicesShapeSize) { | |||
| MS_LOG(EXCEPTION) << "SparseToDense requires 'indices' should be a " << kIndicesShapeSize << "-D Tensor, but got " | |||
| @@ -48,11 +50,8 @@ template <typename I, typename T> | |||
| bool SparseToDenseCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() != 3 || outputs.size() != 1) { | |||
| MS_LOG(ERROR) << "SparseToDense requires 3 inputs and 1 output, but got " << inputs.size() << " inputs and " | |||
| << outputs.size() << " output."; | |||
| return false; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseToDenseInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseToDenseOutputsNum, kernel_name_); | |||
| if (outputs[0]->size == 0) { | |||
| MS_LOG(WARNING) << "SparseToDense output memory size should be greater than 0, but got 0."; | |||
| return true; | |||
| @@ -92,17 +91,5 @@ bool SparseToDenseCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> | |||
| } | |||
| return true; | |||
| } | |||
| template <typename I, typename T> | |||
| void SparseToDenseCPUKernel<I, T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "SparseToDense needs 3 inputs, but got " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "SparseToDense should have 2 outputs, but got " << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -37,7 +37,6 @@ class SparseToDenseCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> output_shape_; | |||
| size_t values_size_{0}; | |||
| }; | |||
| @@ -21,11 +21,16 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSplitInputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void SplitCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "axis"); | |||
| output_num_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "output_num"); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS); | |||
| output_num_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "output_num")); | |||
| if (output_num_ == 0) { | |||
| MS_LOG(EXCEPTION) << "Attr output_num is equal to 0"; | |||
| } | |||
| @@ -49,6 +54,8 @@ template <typename T> | |||
| bool SplitCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSplitInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), output_num_, kernel_name_); | |||
| LaunchKernel(inputs, workspace, outputs); | |||
| return true; | |||
| } | |||
| @@ -56,7 +63,7 @@ bool SplitCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| template <typename T> | |||
| void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t /* size */) { | |||
| SplitParameter param; | |||
| param.num_split_ = LongToInt(output_num_); | |||
| param.num_split_ = SizeToInt(output_num_); | |||
| param.split_dim_ = LongToInt(axis_); | |||
| param.strides_[input_shape_.size() - 1] = 1; | |||
| for (int i = SizeToInt(input_shape_.size()) - 2; i >= 0; i--) { // from -2 to 0 dim | |||
| @@ -64,7 +71,7 @@ void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t /* size */) { | |||
| } | |||
| auto split_sizes = std::make_unique<int[]>(IntToSize(param.num_split_)); | |||
| param.split_sizes_ = split_sizes.get(); | |||
| int split_size = input_shape_[param.split_dim_] / output_num_; | |||
| int split_size = input_shape_[param.split_dim_] / SizeToInt(output_num_); | |||
| for (int i = 0; i < param.num_split_; i++) { | |||
| param.split_sizes_[i] = split_size; | |||
| } | |||
| @@ -96,13 +103,7 @@ void SplitCPUKernel<T>::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| template <typename T> | |||
| void SplitCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| auto input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| int64_t dims = SizeToLong(input_shape_.size()); | |||
| int64_t output_num = SizeToLong(AnfAlgo::GetOutputTensorNum(kernel_node)); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Split needs 1 input."; | |||
| } | |||
| if (dims == 0 || dims > SPLIT_STRIDES_SIZE) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << dims << ", scalar is not supported."; | |||
| } | |||
| @@ -110,14 +111,11 @@ void SplitCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| MS_LOG(EXCEPTION) << "Attr axis_ " << axis_ << " must be in " << -dims << "~" << dims; | |||
| } | |||
| if (axis_ < 0) { | |||
| axis_ += SizeToInt(input_shape_.size()); | |||
| axis_ += SizeToLong(input_shape_.size()); | |||
| } | |||
| if (output_num_ > IntToLong(input_shape_[LongToUlong(axis_)])) { | |||
| if (output_num_ > IntToSize(input_shape_[LongToUlong(axis_)])) { | |||
| MS_LOG(EXCEPTION) << "Attr output_num " << output_num_ << " must less than " << input_shape_[axis_]; | |||
| } | |||
| if (output_num_ != output_num) { | |||
| MS_LOG(EXCEPTION) << "Output num is " << output_num << ", but need " << output_num_; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -37,25 +37,19 @@ class SplitCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs); | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| void LaunchSplit(T *input, T **output, size_t size); | |||
| int64_t axis_{1}; | |||
| int64_t output_num_{1}; | |||
| int64_t axis_step_{1}; | |||
| size_t input_size_{1}; | |||
| size_t dims_after_axis_{1}; | |||
| size_t dims_current_after_axis_{1}; | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs); | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| std::vector<std::vector<size_t>> output_shape_list_; | |||
| int64_t axis_{0}; | |||
| size_t output_num_{1}; | |||
| std::vector<int> input_shape_; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T(Split, KernelAttr(), SplitCPUKernel, float); | |||
| @@ -24,21 +24,25 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kStridedSliceInputsNum = 1; | |||
| constexpr size_t kStridedSliceOutputsNum = 1; | |||
| } // namespace | |||
| enum PosType { kBegin, kEnd }; | |||
| int NormalizePos(int pos, int dim_len, PosType pos_type) { | |||
| if (pos < 0) { | |||
| int normal_pos = pos + dim_len; | |||
| int threshold = pos_type == kBegin ? 0 : -1; | |||
| normal_pos = std::max(normal_pos, threshold); | |||
| return normal_pos; | |||
| if (pos >= 0) { | |||
| int max_pos = pos_type == kBegin ? dim_len - 1 : dim_len; | |||
| return std::min(pos, max_pos); | |||
| } | |||
| int max_pos = pos_type == kBegin ? dim_len - 1 : dim_len; | |||
| return std::min(pos, max_pos); | |||
| int min_pos = pos_type == kBegin ? 0 : -1; | |||
| return std::max(pos + dim_len, min_pos); | |||
| } | |||
| void StridedSliceCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| if (input_shape_.size() > DIMENSION_8D || input_shape_.empty()) { | |||
| @@ -70,18 +74,17 @@ bool StridedSliceCPUKernel::MatchParallelPattern() { | |||
| // Example 2: | |||
| // input shape info: [1, 46, 40] | |||
| // output shape info: [1, 20, 40] | |||
| if (input_shape_.size() != output_shape_.size()) { | |||
| return false; | |||
| } | |||
| std::vector<int> axis_list; | |||
| for (size_t i = 0; i < input_shape_.size(); ++i) { | |||
| if (input_shape_[i] != output_shape_[i]) { | |||
| (void)axis_list.emplace_back(i); | |||
| if (input_shape_.size() == output_shape_.size()) { | |||
| std::vector<int> axis_list; | |||
| for (size_t i = 0; i < input_shape_.size(); ++i) { | |||
| if (input_shape_[i] != output_shape_[i]) { | |||
| (void)axis_list.emplace_back(i); | |||
| } | |||
| } | |||
| if (axis_list.size() == 1) { | |||
| split_axis_ = axis_list.front(); | |||
| return true; | |||
| } | |||
| } | |||
| if (axis_list.size() == 1) { | |||
| split_axis_ = axis_list.front(); | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| @@ -123,8 +126,9 @@ void StridedSliceCPUKernel::InitSliceParam(const std::vector<int64_t> &begin, co | |||
| slice_param_.data_type = type_pair->second.first; | |||
| for (size_t i = 0; i < DIMENSION_8D; i++) { | |||
| int dim_len; | |||
| if (i < begin.size()) { | |||
| int dim_len = SizeToInt(input_shape_[i]); | |||
| dim_len = SizeToInt(input_shape_[i]); | |||
| int begin_pos = LongToInt(begin[i]); | |||
| int end_pos = LongToInt(end[i]); | |||
| int stride_size = LongToInt(stride[i]); | |||
| @@ -142,7 +146,7 @@ void StridedSliceCPUKernel::InitSliceParam(const std::vector<int64_t> &begin, co | |||
| slice_param_.ends_[i] = slice_param_.begins_[i] - 1; | |||
| } | |||
| } else if (i < input_shape_.size()) { | |||
| int dim_len = SizeToInt(input_shape_[i]); | |||
| dim_len = SizeToInt(input_shape_[i]); | |||
| slice_param_.in_shape_[i] = dim_len; | |||
| slice_param_.begins_[i] = 0; | |||
| slice_param_.ends_[i] = dim_len; | |||
| @@ -158,10 +162,10 @@ void StridedSliceCPUKernel::InitSliceParam(const std::vector<int64_t> &begin, co | |||
| slice_param_.num_axes_ = DIMENSION_8D; | |||
| } | |||
| int StridedSliceCPUKernel::RunTaskOnOuter(uint8_t *input_addr, uint8_t *output_addr, int start_pos) { | |||
| int StridedSliceCPUKernel::RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos) { | |||
| int begin_index = slice_param_.begins_[split_axis_]; | |||
| int inner_size = inner_ * data_size_; | |||
| uint8_t *cur_in_ptr = input_addr + (start_pos * input_shape_[split_axis_] + begin_index) * inner_size; | |||
| const uint8_t *cur_in_ptr = input_addr + (start_pos * input_shape_[split_axis_] + begin_index) * inner_size; | |||
| uint8_t *cur_out_ptr = output_addr + start_pos * output_shape_[split_axis_] * inner_size; | |||
| int cur_outer = outer_ - start_pos; | |||
| if (cur_outer <= 0) { | |||
| @@ -173,10 +177,10 @@ int StridedSliceCPUKernel::RunTaskOnOuter(uint8_t *input_addr, uint8_t *output_a | |||
| return common::SUCCESS; | |||
| } | |||
| int StridedSliceCPUKernel::RunTaskOnSplitAxis(uint8_t *input_addr, uint8_t *output_addr, int start_pos) { | |||
| int StridedSliceCPUKernel::RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos) { | |||
| int begin_index = slice_param_.begins_[split_axis_]; | |||
| int inner_size = inner_ * data_size_; | |||
| uint8_t *cur_in_ptr = input_addr + (start_pos * slice_param_.strides_[split_axis_] + begin_index) * inner_size; | |||
| const uint8_t *cur_in_ptr = input_addr + (start_pos * slice_param_.strides_[split_axis_] + begin_index) * inner_size; | |||
| uint8_t *cur_out_ptr = output_addr + start_pos * inner_size; | |||
| int cal_axis_num = output_shape_[split_axis_] - start_pos; | |||
| if (cal_axis_num <= 0) { | |||
| @@ -187,10 +191,10 @@ int StridedSliceCPUKernel::RunTaskOnSplitAxis(uint8_t *input_addr, uint8_t *outp | |||
| return common::SUCCESS; | |||
| } | |||
| void StridedSliceCPUKernel::ParallelRun(uint8_t *input_addr, uint8_t *output_addr, int thread_num) { | |||
| void StridedSliceCPUKernel::ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread_num) { | |||
| int thread_index = 0; | |||
| std::vector<common::Task> tasks; | |||
| std::function<int(StridedSliceCPUKernel *, uint8_t *, uint8_t *, int)> execute_func; | |||
| std::function<int(StridedSliceCPUKernel *, const uint8_t *, uint8_t *, int)> execute_func; | |||
| if (parallel_strategy_ == kOnOuter) { | |||
| execute_func = &StridedSliceCPUKernel::RunTaskOnOuter; | |||
| } else if (parallel_strategy_ == kOnSplitAxis) { | |||
| @@ -208,13 +212,10 @@ void StridedSliceCPUKernel::ParallelRun(uint8_t *input_addr, uint8_t *output_add | |||
| } | |||
| bool StridedSliceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> & /* workspace */, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() != 1 || outputs.size() != 1) { | |||
| MS_LOG(ERROR) << "StridedSlice requires 1 input and 1 output, but got " << inputs.size() << " input and " | |||
| << outputs.size() << " output."; | |||
| return false; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kStridedSliceInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kStridedSliceOutputsNum, kernel_name_); | |||
| if (outputs[0]->size == 0) { | |||
| MS_LOG(WARNING) << "StridedSlice output memory size should be greater than 0, but got 0."; | |||
| return true; | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_CPU_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_STRIDESLICE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_STRIDESLICE_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| @@ -37,14 +37,13 @@ class StridedSliceCPUKernel : public CPUKernel { | |||
| private: | |||
| enum ParallelStrategy { kOnSplitAxis, kOnOuter }; | |||
| void InitSliceParam(const std::vector<int64_t> &begin, const std::vector<int64_t> &end, | |||
| const std::vector<int64_t> &stride); | |||
| bool MatchParallelPattern(); | |||
| void InitParallelParam(); | |||
| void ParallelRun(uint8_t *input_addr, uint8_t *output_addr, int thread_num); | |||
| int RunTaskOnOuter(uint8_t *input_addr, uint8_t *output_addr, int start_pos); | |||
| int RunTaskOnSplitAxis(uint8_t *input_addr, uint8_t *output_addr, int start_pos); | |||
| void ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread_num); | |||
| int RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos); | |||
| int RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos); | |||
| TypeId dtype_; | |||
| int data_size_{4}; | |||
| @@ -70,4 +69,4 @@ MS_REG_CPU_KERNEL(StridedSlice, KernelAttr().AddInputAttr(kNumberTypeFloat64).Ad | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SLICE_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_STRIDESLICE_CPU_KERNEL_H_ | |||
| @@ -20,8 +20,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSubAndFilterInputsNum = 3; | |||
| constexpr size_t kSubAndFilterOutputNum = 2; | |||
| } // namespace | |||
| void SubAndFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| node_wpt_ = kernel_node; | |||
| input_x_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| } | |||
| @@ -29,6 +35,8 @@ void SubAndFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool SubAndFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSubAndFilterInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSubAndFilterOutputNum, kernel_name_); | |||
| if (input_x_dtype_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, outputs); | |||
| } else if (input_x_dtype_ == kNumberTypeInt64) { | |||
| @@ -42,11 +50,9 @@ bool SubAndFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs | |||
| template <typename T> | |||
| void SubAndFilterCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto node_ = node_wpt_.lock(); | |||
| if (!node_) { | |||
| MS_LOG(EXCEPTION) << "node_wpt_ is expired."; | |||
| } | |||
| auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); | |||
| auto node = node_wpt_.lock(); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0); | |||
| batch_size_ = 1; | |||
| for (size_t i = 0; i < indices_shape.size(); ++i) { | |||
| @@ -71,12 +77,12 @@ void SubAndFilterCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| MS_LOG(INFO) << "SubAndFilter output count is " << count; | |||
| std::vector<size_t> out_shape; | |||
| (void)out_shape.emplace_back(count); | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(node_); | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(node); | |||
| std::vector<TypeId> dtypes(output_num); | |||
| for (size_t i = 0; i < output_num; i++) { | |||
| dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); | |||
| dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node, i); | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, out_shape}, node_.get()); | |||
| AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, out_shape}, node.get()); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -35,10 +35,10 @@ class SubAndFilterCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs); | |||
| private: | |||
| size_t batch_size_{1}; | |||
| TypeId input_x_dtype_{kTypeUnknown}; | |||
| CNodeWeakPtr node_wpt_; | |||
| @@ -23,8 +23,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kTensorCopySlicesInputsNum = 2; | |||
| constexpr size_t kTensorCopySlicesOutputsNum = 1; | |||
| } // namespace | |||
| void TensorCopySlicesCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| @@ -48,11 +54,8 @@ void TensorCopySlicesCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool TensorCopySlicesCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /* workspace */, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() != 2 || outputs.size() != 1) { | |||
| MS_LOG(ERROR) << "TensorCopySlices requires 1 input and 1 output, but got " << inputs.size() << " input and " | |||
| << outputs.size() << " output."; | |||
| return false; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kTensorCopySlicesInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kTensorCopySlicesOutputsNum, kernel_name_); | |||
| auto input_addr = reinterpret_cast<uint8_t *>(inputs[0]->addr); | |||
| auto update_addr = reinterpret_cast<uint8_t *>(inputs[1]->addr); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -20,9 +20,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kTensorAddInputsSize = 2; | |||
| constexpr size_t kTensorAddOutputsSize = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void TensorAddCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| // Init shape ans strides | |||
| input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| @@ -33,6 +39,8 @@ template <typename T> | |||
| bool TensorAddCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kTensorAddInputsSize, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kTensorAddOutputsSize, kernel_name_); | |||
| T *input_addr_a = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *input_addr_b = reinterpret_cast<T *>(inputs[1]->addr); | |||
| T *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -20,10 +20,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void TileCPUKernel::TileMultipleCompute(void) { | |||
| namespace { | |||
| constexpr size_t kTileInputsNum = 1; | |||
| constexpr size_t kTileOutputsNum = 1; | |||
| } // namespace | |||
| void TileCPUKernel::TileMultipleCompute() { | |||
| int large_one_multiple_count_ = 0; | |||
| int multiple = 0; | |||
| int mul_index = 0; | |||
| size_t mul_index = 0; | |||
| for (size_t i = 0; i < multiples_.size(); i++) { | |||
| tile_parameter_.multiples_[i] = multiples_[i]; | |||
| if (tile_parameter_.multiples_[i] > 1) { | |||
| @@ -47,6 +52,10 @@ void TileCPUKernel::TileMultipleCompute(void) { | |||
| void TileCPUKernel::TileTensorParamrInit(const CNodePtr &kernel_node) { | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| y_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| if (x_shape_.size() > MAX_TILE_DIM_SIZE || x_shape_.size() > y_shape_.size()) { | |||
| MS_LOG(EXCEPTION) << "Tile input shape should not be greater than default max size :" << MAX_TILE_DIM_SIZE | |||
| << " and output shape : " << y_shape_.size() << ", but got input shape " << x_shape_.size(); | |||
| } | |||
| std::vector<int64_t> multiples_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "multiples"); | |||
| (void)std::transform(multiples_me.begin(), multiples_me.end(), std::back_inserter(multiples_), | |||
| [](const int64_t &value) { return LongToInt(value); }); | |||
| @@ -54,17 +63,9 @@ void TileCPUKernel::TileTensorParamrInit(const CNodePtr &kernel_node) { | |||
| size_t ones = multiples_.size() - x_shape_.size(); | |||
| if (ones > 0) { | |||
| for (size_t i = 0; i < ones; ++i) { | |||
| x_shape_.insert(x_shape_.begin(), 1); | |||
| (void)x_shape_.insert(x_shape_.begin(), 1); | |||
| } | |||
| } | |||
| if (x_shape_.size() > MAX_TILE_DIM_SIZE) { | |||
| MS_LOG(EXCEPTION) << "Input shape size should not greater than " << MAX_TILE_DIM_SIZE << ", but got " | |||
| << x_shape_.size(); | |||
| } | |||
| if (y_shape_.size() < x_shape_.size()) { | |||
| MS_LOG(EXCEPTION) << "Output shape size should not less than input shape size, but got output shape: " << y_shape_ | |||
| << ", input shape: " << x_shape_; | |||
| } | |||
| input_size_ = 1; | |||
| tile_parameter_.in_dim_ = x_shape_.size(); | |||
| @@ -88,7 +89,7 @@ void TileCPUKernel::TileTensorParamrInit(const CNodePtr &kernel_node) { | |||
| void TileCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| CheckParam(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| TileTensorParamrInit(kernel_node); | |||
| launch_map_[kNumberTypeInt8] = &TileCPUKernel::LaunchKernel<int8_t>; | |||
| @@ -112,6 +113,8 @@ void TileCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool TileCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kTileInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kTileOutputsNum, kernel_name_); | |||
| launch_func_(this, inputs, outputs); | |||
| return true; | |||
| } | |||
| @@ -132,16 +135,5 @@ void TileCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st | |||
| Tile(x_addr, y_addr, &tile_parameter_); | |||
| } | |||
| void TileCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but TileCPUKernel needs 1 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but TileCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -36,6 +36,7 @@ class TileCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| @@ -43,8 +44,6 @@ class TileCPUKernel : public CPUKernel { | |||
| void TileMultipleCompute(void); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| std::vector<size_t> y_shape_; | |||
| std::vector<int> multiples_; | |||
| @@ -54,8 +53,8 @@ class TileCPUKernel : public CPUKernel { | |||
| std::unordered_map<TypeId, TypeKernel> launch_map_; | |||
| TypeKernel launch_func_; | |||
| TileParameter tile_parameter_; | |||
| bool one_dim_tile_; | |||
| size_t input_size_; | |||
| bool one_dim_tile_{false}; | |||
| size_t input_size_{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(Tile, KernelAttr(), TileCPUKernel); | |||
| @@ -21,6 +21,11 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kTopKInputsNum = 2; | |||
| constexpr size_t kTopKOutputsNum = 2; | |||
| } // namespace | |||
| template <typename T> | |||
| void TopKCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| @@ -87,8 +92,8 @@ void TopKCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st | |||
| void TopKCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| auto x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (x_shape_.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "Input shape size should not less than 1"; | |||
| if (x_shape_.empty()) { | |||
| MS_LOG(EXCEPTION) << "Input shape is empty"; | |||
| } | |||
| for (size_t i = 0; i < x_shape_.size() - 1; ++i) { | |||
| outer_size_ *= x_shape_[i]; | |||
| @@ -107,6 +112,8 @@ void TopKCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| bool TopKCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspaces, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kTopKInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kTopKOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, workspaces, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||