| @@ -13,16 +13,23 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/elu_grad_cpu_kernel.h" | |||
| #include <cmath> | |||
| #include <string> | |||
| #include <thread> | |||
| #include "backend/kernel_compiler/cpu/elu_grad_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kEleGradInputsNum = 2; | |||
| constexpr size_t kEleGradOutputsNum = 1; | |||
| } // namespace | |||
| void EluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (dtype_ != AnfAlgo::GetInputDeviceDataType(kernel_node, 1)) { | |||
| MS_LOG(EXCEPTION) << "Input0 and input1 must has the same data type"; | |||
| @@ -31,6 +38,8 @@ void EluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool EluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEleGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEleGradOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat) { | |||
| LaunchKernel<float>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat16) { | |||
| @@ -44,9 +53,9 @@ bool EluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, con | |||
| template <typename T> | |||
| void EluGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) const { | |||
| T *input0 = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *input1 = reinterpret_cast<T *>(inputs[1]->addr); | |||
| T *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| const auto *input0 = reinterpret_cast<T *>(inputs[0]->addr); | |||
| const auto *input1 = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1; | |||
| auto task = [input0, input1, output](const size_t start, const size_t end) { | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELU_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELU_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,18 +13,31 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <thread> | |||
| #include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h" | |||
| #include <thread> | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "runtime/device/cpu/mpi/mpi_interface.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kEmbeddingLookupCommGradInputsNum = 1; | |||
| constexpr size_t kEmbeddingLookupCommGradOutputsNum = 1; | |||
| } // namespace | |||
| void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| split_num_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "split_num"); | |||
| MS_LOG(INFO) << "split_num: " << split_num_; | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (split_num_ == 0) { | |||
| MS_LOG(EXCEPTION) << "The split_num_ must be larger than 0."; | |||
| } | |||
| if (input_shape.size() < 1) { | |||
| MS_LOG(EXCEPTION) << "The size of input's shape must be at least 1."; | |||
| } | |||
| if (input_shape[0] % split_num_ != 0) { | |||
| MS_LOG(EXCEPTION) << "Input shape[0] is " << input_shape[0] << ", but it must be multiple of split_num."; | |||
| } | |||
| @@ -33,14 +46,16 @@ void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEmbeddingLookupCommGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEmbeddingLookupCommGradOutputsNum, kernel_name_); | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| auto start_time = std::chrono::steady_clock::now(); | |||
| #else | |||
| struct timeval start_time, end_time; | |||
| (void)gettimeofday(&start_time, nullptr); | |||
| #endif | |||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto *output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| size_t input_size = inputs[0]->size; | |||
| size_t output_size = outputs[0]->size; | |||
| MS_LOG(DEBUG) << "input addr: " << input_addr << "input size: " << input_size; | |||
| @@ -67,12 +82,5 @@ bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressP | |||
| #endif | |||
| return true; | |||
| } | |||
| void EmbeddingLookUpCommGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCommGradCPUKernel needs 1."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -33,7 +35,6 @@ class EmbeddingLookUpCommGradCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| int64_t split_num_; | |||
| }; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,9 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h" | |||
| #include <thread> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "ir/primitive.h" | |||
| #include "common/thread_pool.h" | |||
| @@ -23,6 +24,11 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kBlockSize = 10000; | |||
| constexpr size_t kEmbeddingLookupInputsNum = 2; | |||
| constexpr size_t kEmbeddingLookupOutputsNum = 1; | |||
| constexpr size_t kEmbeddingLookupInputParamsMaxDim = 2; | |||
| template <typename T> | |||
| void LookUpTableTask(const float *input_addr, const T *indices_addr, float *output_addr, size_t indices_lens, | |||
| size_t outer_dim_size, T offset, size_t first_dim_size) { | |||
| @@ -48,11 +54,13 @@ void LookUpTableTask(const float *input_addr, const T *indices_addr, float *outp | |||
| } // namespace | |||
| void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| node_wpt_ = kernel_node; | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.empty()) { | |||
| MS_LOG(EXCEPTION) << "Param must be at least 1D"; | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.empty() || input_shape.size() > kEmbeddingLookupInputParamsMaxDim) { | |||
| MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel support 1-" << kEmbeddingLookupInputParamsMaxDim | |||
| << "D input tensor, but got " << input_shape.size() << "D."; | |||
| } | |||
| first_dim_size_ = input_shape[0]; | |||
| outer_dim_size_ = 1; | |||
| @@ -74,11 +82,11 @@ template <typename T> | |||
| void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (!node_wpt_.expired()) { | |||
| auto node_ = node_wpt_.lock(); | |||
| if (!node_) { | |||
| auto node = node_wpt_.lock(); | |||
| if (!node) { | |||
| MS_LOG(EXCEPTION) << "node_wpt_ is expired."; | |||
| } | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0); | |||
| if (input_shape.empty()) { | |||
| MS_LOG(EXCEPTION) << "Param must be at least 1D"; | |||
| } | |||
| @@ -89,15 +97,15 @@ void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr | |||
| } | |||
| indices_lens_ = 1; | |||
| std::vector<size_t> indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1); | |||
| std::vector<size_t> indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1); | |||
| for (const auto &shape : indices_shape) { | |||
| indices_lens_ *= shape; | |||
| } | |||
| } | |||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto indices_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| size_t thread_num = indices_lens_ / 10000 + 1; | |||
| const auto *input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| const auto *indices_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| size_t thread_num = indices_lens_ / kBlockSize + 1; | |||
| auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); | |||
| thread_num = thread_num > max_thread_num ? max_thread_num : thread_num; | |||
| std::vector<common::Task> tasks; | |||
| @@ -127,6 +135,8 @@ void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr | |||
| bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEmbeddingLookupInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEmbeddingLookupOutputsNum, kernel_name_); | |||
| if (indices_data_type_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, outputs); | |||
| } else { | |||
| @@ -134,18 +144,5 @@ bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp | |||
| } | |||
| return true; | |||
| } | |||
| void EmbeddingLookUpCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.size() > 4) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() | |||
| << ", but EmbeddingLookUpCPUKernel only support 4d or lower."; | |||
| } | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCPUKernel needs 2."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -31,11 +33,11 @@ class EmbeddingLookUpCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs); | |||
| protected: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| int64_t offset_{0}; | |||
| size_t indices_lens_{1}; | |||
| size_t first_dim_size_{1}; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,18 +13,26 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void EqualCountCPUKernel::InitKernel(const CNodePtr &) {} | |||
| namespace { | |||
| constexpr size_t kEqualCountInputsNum = 2; | |||
| constexpr size_t kEqualCountOutputsNum = 1; | |||
| } // namespace | |||
| void EqualCountCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| } | |||
| bool EqualCountCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Input or output empty!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEqualCountInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEqualCountOutputsNum, kernel_name_); | |||
| if (inputs[0]->size != inputs[1]->size) { | |||
| MS_LOG(EXCEPTION) << "Input or output size!"; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EQUAL_COUNT_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EQUAL_COUNT_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -20,28 +20,27 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kExpm1InputsNum = 1; | |||
| constexpr size_t kExpm1OutputsNum = 1; | |||
| } // namespace | |||
| void Expm1CPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but Expm1CPUKernel needs 1 inputs."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but Expm1CPUKernel needs 1 output."; | |||
| } | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (input_dtype_ != kNumberTypeFloat16 && input_dtype_ != kNumberTypeFloat32 && input_dtype_ != kNumberTypeFloat) { | |||
| if (input_dtype_ != kNumberTypeFloat16 && input_dtype_ != kNumberTypeFloat32) { | |||
| MS_LOG(EXCEPTION) << "Unsupported input type found."; | |||
| } | |||
| } | |||
| bool Expm1CPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kExpm1InputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kExpm1OutputsNum, kernel_name_); | |||
| if (input_dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (input_dtype_ == kNumberTypeFloat32 || input_dtype_ == kNumberTypeFloat) { | |||
| } else if (input_dtype_ == kNumberTypeFloat32) { | |||
| LaunchKernel<float>(inputs, outputs); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Only support float, half, but actual data type is " << TypeIdLabel(input_dtype_); | |||
| @@ -52,11 +51,9 @@ bool Expm1CPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const | |||
| template <typename T> | |||
| void Expm1CPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| T *input = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| const auto *input = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t elem_num = inputs[0]->size / sizeof(T); | |||
| for (size_t i = 0; i < elem_num; i++) { | |||
| output[i] = exp(input[i]) - T(1); | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EXPM1_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EXPM1_CPU_KERNEL_H_ | |||
| @@ -45,8 +46,6 @@ MS_REG_CPU_KERNEL(Expm1, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutput | |||
| MS_REG_CPU_KERNEL(Expm1, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| Expm1CPUKernel); | |||
| MS_REG_CPU_KERNEL(Expm1, KernelAttr().AddInputAttr(kNumberTypeFloat).AddOutputAttr(kNumberTypeFloat32), Expm1CPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/gather_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "nnacl/gather_parameter.h" | |||
| @@ -21,12 +22,23 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kGatherInputsNum = 2; | |||
| constexpr size_t kGatherOutputsNum = 1; | |||
| constexpr size_t kGatherInputParamsMaxDim = 4; | |||
| } // namespace | |||
| template <typename T> | |||
| void GatherV2CPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| if (input_shape_.size() > kGatherInputParamsMaxDim) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape_.size() << ", but GatherV2CPUKernel olny support " | |||
| << kGatherInputParamsMaxDim << "D or lower."; | |||
| } | |||
| if (!is_dynamic_shape_) { | |||
| axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS); | |||
| } | |||
| @@ -36,9 +48,11 @@ template <typename T> | |||
| bool GatherV2CPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto input_tensor = reinterpret_cast<int8_t *>(inputs[0]->addr); | |||
| indices_data_ = reinterpret_cast<int32_t *>(inputs[1]->addr); | |||
| auto output_addr = reinterpret_cast<int8_t *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherOutputsNum, kernel_name_); | |||
| const auto *input_tensor = reinterpret_cast<int8_t *>(inputs[0]->addr); | |||
| const auto *indices_data = reinterpret_cast<int32_t *>(inputs[1]->addr); | |||
| auto *output_addr = reinterpret_cast<int8_t *>(outputs[0]->addr); | |||
| if (is_dynamic_shape_) { | |||
| axis_ = reinterpret_cast<int64_t *>(inputs[2]->addr)[0]; | |||
| } | |||
| @@ -51,13 +65,14 @@ bool GatherV2CPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| axis_ = axis_ + dims; | |||
| } | |||
| int max_thread_num = static_cast<int>(common::ThreadPool::GetInstance().GetSyncRunThreadNum()); | |||
| ParallelRun(input_tensor, output_addr, max_thread_num); | |||
| int max_thread_num = SizeToInt(common::ThreadPool::GetInstance().GetSyncRunThreadNum()); | |||
| ParallelRun(input_tensor, indices_data, output_addr, max_thread_num); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void GatherV2CPUKernel<T>::ParallelRun(int8_t *input_addr, int8_t *output_addr, int thread_num) { | |||
| void GatherV2CPUKernel<T>::ParallelRun(const int8_t *input_addr, const int *indices_data, int8_t *output_addr, | |||
| int thread_num) { | |||
| size_t outer_size = 1, inner_size = 1; | |||
| auto axis = static_cast<size_t>(axis_); | |||
| for (size_t i = 0; i < axis; ++i) { | |||
| @@ -76,12 +91,14 @@ void GatherV2CPUKernel<T>::ParallelRun(int8_t *input_addr, int8_t *output_addr, | |||
| int thread_index = 0; | |||
| while (thread_index < thread_num) { | |||
| int count = SizeToInt(MSMIN(stride, outer_size - stride * IntToSize(thread_index))); | |||
| if (count <= 0) break; | |||
| if (count <= 0) { | |||
| break; | |||
| } | |||
| auto thread_stride = static_cast<size_t>(stride * thread_index); | |||
| int8_t *in = input_addr + thread_stride * limit * inner_size * sizeof(T); | |||
| const int8_t *in = input_addr + thread_stride * limit * inner_size * sizeof(T); | |||
| int8_t *out = output_addr + thread_stride * indices_element_size * inner_size * sizeof(T); | |||
| auto block = [this, in, count, inner_size, limit, indices_element_size, out, thread_index]() { | |||
| int ret = Gather(in, count, inner_size, limit, indices_data_, indices_element_size, out, sizeof(T)); | |||
| auto block = [this, in, indices_data, count, inner_size, limit, indices_element_size, out, thread_index]() { | |||
| int ret = Gather(in, count, inner_size, limit, indices_data, indices_element_size, out, sizeof(T)); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "GatherRun error task_id[" << thread_index << "] error_code[" << ret << "]"; | |||
| return common::FAIL; | |||
| @@ -95,18 +112,5 @@ void GatherV2CPUKernel<T>::ParallelRun(int8_t *input_addr, int8_t *output_addr, | |||
| MS_LOG(EXCEPTION) << "SyncRun error!"; | |||
| } | |||
| } | |||
| template <typename T> | |||
| void GatherV2CPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num == 3) { | |||
| is_dynamic_shape_ = true; | |||
| MS_LOG(DEBUG) << " GatherV2CPUKernel running in Dynamic Mode."; | |||
| } else if (input_num == 2) { | |||
| MS_LOG(DEBUG) << " GatherV2CPUKernel running in Normal Mode."; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherV2CPUKernel needs 2."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -35,12 +37,10 @@ class GatherV2CPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| void ParallelRun(int8_t *input_addr, int8_t *output_addr, int thread_num); | |||
| void ParallelRun(const int8_t *input_addr, const int *indices_data, int8_t *output_addr, int thread_num); | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<size_t> indices_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| int *indices_data_ = nullptr; | |||
| int64_t axis_{0}; | |||
| bool is_dynamic_shape_{false}; | |||
| }; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -19,6 +19,9 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kGatherDInputsNum = 3; | |||
| constexpr size_t kGatherDOutputsNum = 1; | |||
| size_t get_element_num(const std::vector<size_t> &shape) { | |||
| size_t size = 1; | |||
| for (size_t i = 0; i < shape.size(); i++) { | |||
| @@ -63,6 +66,8 @@ void CopyTask(size_t cur, std::vector<size_t> *pos, T *input, const I *index, co | |||
| template <typename T, typename I> | |||
| void GatherDCPUKernel<T, I>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| index_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 2); | |||
| if (input_shape_.size() != index_shape_.size()) { | |||
| @@ -76,6 +81,8 @@ template <typename T, typename I> | |||
| bool GatherDCPUKernel<T, I>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherDInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherDOutputsNum, kernel_name_); | |||
| size_t input_size = get_element_num(input_shape_) * sizeof(T); | |||
| size_t index_size = get_element_num(index_shape_) * sizeof(I); | |||
| size_t dim_size = sizeof(int); | |||
| @@ -83,17 +90,15 @@ bool GatherDCPUKernel<T, I>::Launch(const std::vector<kernel::AddressPtr> &input | |||
| if (inputs[0]->size != input_size || inputs[1]->size != dim_size || inputs[2]->size != index_size || | |||
| outputs[0]->size != output_size) { | |||
| MS_LOG(EXCEPTION) << "invalid input or output data size!"; | |||
| return false; | |||
| } | |||
| auto input = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto dim = reinterpret_cast<int32_t *>(inputs[1]->addr); | |||
| auto index = reinterpret_cast<I *>(inputs[2]->addr); | |||
| auto *input = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *dim = reinterpret_cast<int32_t *>(inputs[1]->addr); | |||
| auto *index = reinterpret_cast<I *>(inputs[2]->addr); | |||
| auto output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| int32_t input_rank = SizeToInt(input_shape_.size()); | |||
| if (dim[0] >= input_rank || dim[0] < -input_rank) { | |||
| MS_LOG(EXCEPTION) << "The value of 'dim' should be in [" << -input_rank << ", " << input_rank | |||
| << "], but got: " << dim[0]; | |||
| return false; | |||
| } | |||
| if (dim[0] < 0) { | |||
| dim[0] = static_cast<int>(dim[0] + input_rank); | |||
| @@ -105,7 +110,6 @@ bool GatherDCPUKernel<T, I>::Launch(const std::vector<kernel::AddressPtr> &input | |||
| if (index[i] >= max_index || index[i] < -max_index) { | |||
| MS_LOG(EXCEPTION) << "The value of index should be in [" << -max_index << ", " << max_index | |||
| << "], but got: " << index[i]; | |||
| return false; | |||
| } | |||
| if (index[i] < 0) { | |||
| index[i] = max_index + index[i]; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,8 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERD_CPU_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -52,4 +52,4 @@ MS_REG_CPU_KERNEL_T_S(GatherD, KernelAttr(), GatherDCPUKernel, bool, int64_t); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERD_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_CPU_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,12 +13,16 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/gather_d_grad_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kGatherDGradInputsNum = 2; | |||
| constexpr size_t kGatherDGradOutputsNum = 1; | |||
| size_t get_element_num(const std::vector<size_t> &shape) { | |||
| size_t size = 1; | |||
| for (size_t i = 0; i < shape.size(); i++) { | |||
| @@ -59,6 +63,8 @@ void GatherDGradCopyTask(size_t cur, std::vector<size_t> *pos, T *input, I *inde | |||
| template <typename I, typename T> | |||
| void GatherDGradCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| index_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| if (input_shape_ != index_shape_) { | |||
| @@ -72,25 +78,23 @@ template <typename I, typename T> | |||
| bool GatherDGradCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherDGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherDGradOutputsNum, kernel_name_); | |||
| size_t input_size = get_element_num(input_shape_) * sizeof(T); | |||
| size_t index_size = get_element_num(index_shape_) * sizeof(I); | |||
| size_t output_size = get_element_num(output_shape_) * sizeof(T); | |||
| if (inputs[0]->size != index_size || inputs[1]->size != input_size || outputs[0]->size != output_size) { | |||
| MS_LOG(EXCEPTION) << "invalid input or output data size!"; | |||
| return false; | |||
| } | |||
| auto index = reinterpret_cast<I *>(inputs[0]->addr); | |||
| auto input = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *index = reinterpret_cast<I *>(inputs[0]->addr); | |||
| auto *input = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto out = reinterpret_cast<T *>(outputs[0]->addr); | |||
| int output_rank = SizeToInt(output_shape_.size()); | |||
| if (axis_ >= output_rank || axis_ < -output_rank) { | |||
| MS_LOG(EXCEPTION) << "The value of 'axis_' should be in [" << -output_rank << ", " << output_rank | |||
| << "], but got: " << axis_; | |||
| return false; | |||
| } | |||
| if (axis_ < 0) { | |||
| axis_ = axis_ + SizeToInt(output_shape_.size()); | |||
| } | |||
| @@ -102,7 +106,6 @@ bool GatherDGradCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &i | |||
| if (index[i] >= max_index || index[i] < -max_index) { | |||
| MS_LOG(EXCEPTION) << "The value of index should be in [" << -max_index << ", " << max_index | |||
| << "], but got: " << index[i]; | |||
| return false; | |||
| } | |||
| if (index[i] < 0) { | |||
| index[i] = max_index + index[i]; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERDGRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERDGRAD_CPU_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHER_D_GRAD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -37,7 +39,7 @@ class GatherDGradCPUKernel : public CPUKernel { | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<size_t> index_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| int32_t axis_; | |||
| int32_t axis_{1}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T_S(GatherDGrad, KernelAttr(), GatherDGradCPUKernel, int32_t, int32_t); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,14 +13,23 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/gathernd_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #define MAX_INT (((unsigned int)(-1)) >> 1) | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| #define MAX_INT (((unsigned int)(-1)) >> 1) | |||
| constexpr size_t kGatherNdInputsNum = 2; | |||
| constexpr size_t kGatherNdOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void GatherNdCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| indices_shapes_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| output_shapes_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| @@ -35,6 +44,9 @@ void GatherNdCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| size_t dim_after_indices = 1; | |||
| size_t dim_indices_last = indices_shapes_[indices_shapes_.size() - IntToSize(1)]; | |||
| if (dim_indices_last == 0) { | |||
| MS_LOG(EXCEPTION) << "Value of indices_shapes_[" << indices_shapes_.size() << " - 1] should not be 0"; | |||
| } | |||
| for (size_t i = dim_indices_last; i < input_shapes_.size(); i++) { | |||
| dim_after_indices *= input_shapes_[i]; | |||
| } | |||
| @@ -61,8 +73,10 @@ template <typename T> | |||
| bool GatherNdCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kGatherNdInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kGatherNdOutputsNum, kernel_name_); | |||
| const auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| const auto *indices_addr = reinterpret_cast<int *>(inputs[1]->addr); | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t output_dim0 = dims_[0]; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERND_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GATHERND_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -56,7 +58,6 @@ MS_REG_CPU_KERNEL_T(GatherNd, KernelAttr(), GatherNdCPUKernel, uint32_t); | |||
| MS_REG_CPU_KERNEL_T(GatherNd, KernelAttr(), GatherNdCPUKernel, uint64_t); | |||
| MS_REG_CPU_KERNEL_T(GatherNd, KernelAttr(), GatherNdCPUKernel, float); | |||
| MS_REG_CPU_KERNEL_T(GatherNd, KernelAttr(), GatherNdCPUKernel, double); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -20,9 +20,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kHSigmoidInputsNum = 1; | |||
| constexpr size_t kHSigmoidOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void HSigmoidCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const uint64_t &d : x_shape_) { | |||
| tensor_size_ *= d; | |||
| @@ -33,33 +39,28 @@ template <typename T> | |||
| bool HSigmoidCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto x = reinterpret_cast<T *>(inputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kHSigmoidInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kHSigmoidOutputsNum, kernel_name_); | |||
| const auto *x = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto y = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto zero = static_cast<T>(0); | |||
| auto one = static_cast<T>(1); | |||
| auto three = static_cast<T>(3); | |||
| auto six = static_cast<T>(6); | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| if (x[i] <= -3) { | |||
| y[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| y[i] = 1; | |||
| if (x[i] + three <= zero) { | |||
| y[i] = zero; | |||
| } else if (x[i] >= three) { | |||
| y[i] = one; | |||
| } else { | |||
| y[i] = (x[i] + 3) / 6; | |||
| y[i] = (x[i] + three) / six; | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void HSigmoidCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidCPUKernel needs 1 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -36,7 +37,6 @@ class HSigmoidCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| @@ -52,4 +52,4 @@ MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr(), HSigmoidCPUKernel, int64_t); | |||
| MS_REG_CPU_KERNEL_T(HSigmoid, KernelAttr(), HSigmoidCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_CPU_KERNEL_H_ | |||
| @@ -20,9 +20,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kHSigmoidGradInputsNum = 2; | |||
| constexpr size_t kHSigmoidGradOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void HSigmoidGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| for (const uint64_t &d : x_shape_) { | |||
| tensor_size_ *= d; | |||
| @@ -33,32 +39,27 @@ template <typename T> | |||
| bool HSigmoidGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto dy = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto x = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto out = reinterpret_cast<T *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kHSigmoidGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kHSigmoidGradOutputsNum, kernel_name_); | |||
| const auto *dy = reinterpret_cast<T *>(inputs[0]->addr); | |||
| const auto *x = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *out = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto zero = static_cast<T>(0); | |||
| auto three = static_cast<T>(3); | |||
| auto six = static_cast<T>(6); | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| if (x[i] <= -3 || x[i] >= 3) { | |||
| out[i] = 0; | |||
| if (x[i] + three <= zero || x[i] >= three) { | |||
| out[i] = zero; | |||
| } else { | |||
| out[i] = dy[i] / 6; | |||
| out[i] = dy[i] / six; | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void HSigmoidGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSigmoidGradCPUKernel needs 2 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSigmoidGradCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -36,7 +37,6 @@ class HSigmoidGradCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| @@ -48,4 +48,4 @@ MS_REG_CPU_KERNEL_T(HSigmoidGrad, KernelAttr(), HSigmoidGradCPUKernel, int64_t); | |||
| MS_REG_CPU_KERNEL_T(HSigmoidGrad, KernelAttr(), HSigmoidGradCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSIGMOID_GRAD_CPU_KERNEL_H_ | |||
| @@ -20,9 +20,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kHSwishInputsNum = 1; | |||
| constexpr size_t kHSwishOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void HSwishCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const uint64_t &d : x_shape_) { | |||
| tensor_size_ *= d; | |||
| @@ -32,33 +38,27 @@ void HSwishCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| bool HSwishCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto x = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto y = reinterpret_cast<T *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kHSwishInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kHSwishOutputsNum, kernel_name_); | |||
| const auto *x = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *y = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto zero = static_cast<T>(0); | |||
| auto three = static_cast<T>(3); | |||
| auto six = static_cast<T>(6); | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| if (x[i] <= -3) { | |||
| y[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| if (x[i] + three <= zero) { | |||
| y[i] = zero; | |||
| } else if (x[i] >= three) { | |||
| y[i] = x[i]; | |||
| } else { | |||
| y[i] = x[i] * (x[i] + 3) / 6; | |||
| y[i] = x[i] * (x[i] + three) / six; | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void HSwishCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwishCPUKernel needs 1 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSwishCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -36,7 +37,6 @@ class HSwishCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| @@ -48,4 +48,4 @@ MS_REG_CPU_KERNEL_T(HSwish, KernelAttr(), HSwishCPUKernel, int64_t); | |||
| MS_REG_CPU_KERNEL_T(HSwish, KernelAttr(), HSwishCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_CPU_KERNEL_H_ | |||
| @@ -20,9 +20,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kHSwishGradInputsNum = 2; | |||
| constexpr size_t kHSwishGradOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void HSwishGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| for (const uint64_t &d : x_shape_) { | |||
| tensor_size_ *= d; | |||
| @@ -33,34 +39,30 @@ template <typename T> | |||
| bool HSwishGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto dy = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto x = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto out = reinterpret_cast<T *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kHSwishGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kHSwishGradOutputsNum, kernel_name_); | |||
| const auto *dy = reinterpret_cast<T *>(inputs[0]->addr); | |||
| const auto *x = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *out = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto zero = static_cast<T>(0); | |||
| auto two = static_cast<T>(2); | |||
| auto three = static_cast<T>(3); | |||
| auto six = static_cast<T>(6); | |||
| auto task = [&](size_t start, size_t end) { | |||
| for (uint64_t i = start; i < end; ++i) { | |||
| if (x[i] <= -3) { | |||
| out[i] = 0; | |||
| } else if (x[i] >= 3) { | |||
| if (x[i] + three <= zero) { | |||
| out[i] = zero; | |||
| } else if (x[i] >= three) { | |||
| out[i] = dy[i]; | |||
| } else { | |||
| out[i] = dy[i] * (2 * x[i] + 3) / 6; | |||
| out[i] = dy[i] * (two * x[i] + three) / six; | |||
| } | |||
| } | |||
| }; | |||
| CPUKernelUtils::ParallelFor(task, tensor_size_); | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void HSwishGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but HSwishGradCPUKernel needs 2 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but HSwishGradCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -36,7 +37,6 @@ class HSwishGradCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| uint64_t tensor_size_ = 1; | |||
| }; | |||
| @@ -48,4 +48,4 @@ MS_REG_CPU_KERNEL_T(HSwishGrad, KernelAttr(), HSwishGradCPUKernel, int64_t); | |||
| MS_REG_CPU_KERNEL_T(HSwishGrad, KernelAttr(), HSwishGradCPUKernel, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_TILE_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_HSWISH_GRAD_CPU_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -21,18 +21,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void IsFiniteCPUKernel::InitKernel(const CNodePtr &kernelNode) { | |||
| MS_EXCEPTION_IF_NULL(kernelNode); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernelNode); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but IsFiniteCPUKernel needs 1 inputs."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernelNode); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but IsFiniteCPUKernel needs 1 output."; | |||
| } | |||
| namespace { | |||
| constexpr size_t kIsFiniteInputsNum = 1; | |||
| constexpr size_t kIsFiniteOutputsNum = 1; | |||
| } // namespace | |||
| input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernelNode, 0); | |||
| void IsFiniteCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (dtype_map_.find(input_dtype_) == dtype_map_.end()) { | |||
| MS_LOG(EXCEPTION) << "Unsupported input type found."; | |||
| } | |||
| @@ -40,24 +37,24 @@ void IsFiniteCPUKernel::InitKernel(const CNodePtr &kernelNode) { | |||
| bool IsFiniteCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kIsFiniteInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kIsFiniteOutputsNum, kernel_name_); | |||
| if (input_dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernelFloat16(inputs, outputs); | |||
| } else if (input_dtype_ == kNumberTypeFloat32 || input_dtype_ == kNumberTypeFloat) { | |||
| } else if (input_dtype_ == kNumberTypeFloat32) { | |||
| LaunchKernelFloat<float>(inputs, outputs); | |||
| } else if (input_dtype_ == kNumberTypeFloat64) { | |||
| LaunchKernelFloat<double>(inputs, outputs); | |||
| } else if (dtype_map_.find(input_dtype_) != dtype_map_.end()) { | |||
| LaunchKernelOther(inputs, outputs); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Only support bool, int, uint, float, but actual data type is " << TypeIdLabel(input_dtype_); | |||
| LaunchKernelOther(inputs, outputs); | |||
| } | |||
| return true; | |||
| } | |||
| void IsFiniteCPUKernel::LaunchKernelFloat16(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) const { | |||
| float16 *input = reinterpret_cast<float16 *>(inputs[0]->addr); | |||
| bool *output = reinterpret_cast<bool *>(outputs[0]->addr); | |||
| const auto *input = reinterpret_cast<float16 *>(inputs[0]->addr); | |||
| auto *output = reinterpret_cast<bool *>(outputs[0]->addr); | |||
| size_t elem_num = inputs[0]->size / sizeof(float16); | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ISFINITE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ISFINITE_CPU_KERNEL_H_ | |||
| @@ -21,18 +21,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void IsNanCPUKernel::InitKernel(const CNodePtr &kernelNode) { | |||
| MS_EXCEPTION_IF_NULL(kernelNode); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernelNode); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but IsNanCPUKernel needs 1 inputs."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernelNode); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but IsNanCPUKernel needs 1 output."; | |||
| } | |||
| namespace { | |||
| constexpr size_t kIsNanInputsNum = 1; | |||
| constexpr size_t kIsNanOutputsNum = 1; | |||
| } // namespace | |||
| input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernelNode, 0); | |||
| void IsNanCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (dtype_map_.find(input_dtype_) == dtype_map_.end()) { | |||
| MS_LOG(EXCEPTION) << "Unsupported input type found."; | |||
| } | |||
| @@ -40,24 +37,24 @@ void IsNanCPUKernel::InitKernel(const CNodePtr &kernelNode) { | |||
| bool IsNanCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kIsNanInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kIsNanOutputsNum, kernel_name_); | |||
| if (input_dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernelFloat16(inputs, outputs); | |||
| } else if (input_dtype_ == kNumberTypeFloat32 || input_dtype_ == kNumberTypeFloat) { | |||
| } else if (input_dtype_ == kNumberTypeFloat32) { | |||
| LaunchKernelFloat<float>(inputs, outputs); | |||
| } else if (input_dtype_ == kNumberTypeFloat64) { | |||
| LaunchKernelFloat<double>(inputs, outputs); | |||
| } else if (dtype_map_.find(input_dtype_) != dtype_map_.end()) { | |||
| LaunchKernelOther(inputs, outputs); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Only support bool, int, uint, float, but actual data type is " << TypeIdLabel(input_dtype_); | |||
| LaunchKernelOther(inputs, outputs); | |||
| } | |||
| return true; | |||
| } | |||
| void IsNanCPUKernel::LaunchKernelFloat16(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| float16 *input = reinterpret_cast<float16 *>(inputs[0]->addr); | |||
| bool *output = reinterpret_cast<bool *>(outputs[0]->addr); | |||
| const auto *input = reinterpret_cast<float16 *>(inputs[0]->addr); | |||
| auto *output = reinterpret_cast<bool *>(outputs[0]->addr); | |||
| size_t elem_num = inputs[0]->size / sizeof(float16); | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ISNAN_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ISNAN_CPU_KERNEL_H_ | |||
| @@ -19,9 +19,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kL2LossInputsNum = 1; | |||
| constexpr size_t kL2LossOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void L2LossCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| for (const size_t &d : x_shape) { | |||
| tensor_size_ *= d; | |||
| @@ -31,26 +37,16 @@ void L2LossCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| bool L2LossCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kL2LossInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kL2LossOutputsNum, kernel_name_); | |||
| auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto result_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| *result_addr = (T)0; | |||
| *result_addr = static_cast<T>(0); | |||
| for (size_t i = 0; i < tensor_size_; i++) { | |||
| *result_addr += input_addr[i] * input_addr[i]; | |||
| } | |||
| *result_addr = *result_addr / 2; | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void L2LossCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but L2LossCPUKernel needs 1 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but L2LossCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_L2_LOSS_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_L2_LOSS_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -36,8 +37,8 @@ class L2LossCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| size_t tensor_size_{1}; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| size_t tensor_size_ = 1; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T(L2Loss, KernelAttr(), L2LossCPUKernel, float16); | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| @@ -20,8 +21,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kLayerNormInputsNum = 3; | |||
| constexpr size_t kLayerNormOutputsNum = 3; | |||
| } // namespace | |||
| void LayerNormCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto begin_norm_axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "begin_norm_axis"); | |||
| @@ -48,12 +55,14 @@ void LayerNormCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool LayerNormCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kLayerNormInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kLayerNormOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) { | |||
| LaunchKernel<float>(inputs, outputs); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Input dtype only support float16, float32, float64!"; | |||
| MS_LOG(EXCEPTION) << "Input dtype only support float16, float32, float64"; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -111,16 +120,5 @@ void LayerNormCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, con | |||
| } | |||
| (void)common::ThreadPool::GetInstance().SyncRun(tasks); | |||
| } | |||
| void LayerNormCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "LayerNormCPUKernel needs 3 inputs, but gets " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 3) { | |||
| MS_LOG(EXCEPTION) << "LayerNormCPUKernel expects 3 output, but gets" << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -34,11 +35,10 @@ class LayerNormCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| float eps_{1e-12}; | |||
| size_t block_num_{1}; | |||
| @@ -21,8 +21,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kLayerNormGradInputsNum = 5; | |||
| constexpr size_t kLayerNormGradOutputsNum = 3; | |||
| } // namespace | |||
| void LayerNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| std::vector<size_t> x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto begin_norm_axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "begin_norm_axis"); | |||
| @@ -53,6 +59,8 @@ void LayerNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool LayerNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kLayerNormGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kLayerNormGradOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) { | |||
| @@ -66,14 +74,14 @@ bool LayerNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &input | |||
| template <typename T> | |||
| void LayerNormGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto x = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto dy = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto var = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto mean = reinterpret_cast<T *>(inputs[3]->addr); | |||
| auto gamma = reinterpret_cast<T *>(inputs[4]->addr); | |||
| auto dx = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto dg = reinterpret_cast<T *>(outputs[1]->addr); | |||
| auto db = reinterpret_cast<T *>(outputs[2]->addr); | |||
| auto *x = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *dy = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *var = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto *mean = reinterpret_cast<T *>(inputs[3]->addr); | |||
| auto *gamma = reinterpret_cast<T *>(inputs[4]->addr); | |||
| auto *dx = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto *dg = reinterpret_cast<T *>(outputs[1]->addr); | |||
| auto *db = reinterpret_cast<T *>(outputs[2]->addr); | |||
| size_t thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); | |||
| auto thread_num1 = param_num_ < thread_num ? param_num_ : thread_num; | |||
| std::vector<common::Task> tasks1; | |||
| @@ -121,7 +129,7 @@ void LayerNormGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| auto norm_shift = static_cast<int>(j / block_size_); | |||
| auto var_sqrt = (T)std::pow(static_cast<double>(var[norm_shift]) + eps_, -0.5); | |||
| auto dx1 = dy[j] * gamma[param_shift] * var_sqrt; | |||
| auto dx2 = sum1 * (T)2.0 / block_size_ * (x[j] - mean[norm_shift]); | |||
| auto dx2 = sum1 * (T)2.0 / (T)(block_size_) * (x[j] - mean[norm_shift]); | |||
| auto dx3 = ((T)(-1.0) * var_sqrt * sum2 + ((T)1.0 / (T)block_size_) * sum1 * sum3) * ((T)1.0 / (T)block_size_); | |||
| dx[j] = dx1 + dx2 + dx3; | |||
| } | |||
| @@ -144,16 +152,5 @@ void LayerNormGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| } | |||
| (void)common::ThreadPool::GetInstance().SyncRun(tasks2); | |||
| } | |||
| void LayerNormGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 5) { | |||
| MS_LOG(EXCEPTION) << "LayerNormGradCPUKernel needs 5 inputs, but gets " << input_num; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 3) { | |||
| MS_LOG(EXCEPTION) << "LayerNormGradCPUKernel expects 3 output, but gets" << output_num; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -34,11 +35,10 @@ class LayerNormGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| float eps_{1e-12}; | |||
| size_t block_num_{1}; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -23,12 +23,17 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kMapCacheIdxInputsNum = 5; | |||
| constexpr size_t kMapCacheIdxOutputsNum = 4; | |||
| } // namespace | |||
| template <typename T> | |||
| int Compress(HashmapEntry<T> *entry_p, const size_t &length, T entry) { | |||
| T i = (entry + 1) % length; | |||
| int64_t off = 1; | |||
| T i = (entry + 1) % static_cast<T>(length); | |||
| T off = 1; | |||
| int compress_count = 0; | |||
| for (; !entry_p[i].IsEmpty(); i = (i + 1) % length, off++) { | |||
| for (; !entry_p[i].IsEmpty(); i = (i + 1) % static_cast<T>(length), off++) { | |||
| if (entry_p[i].tag_ > off) { | |||
| entry_p[entry].key_ = entry_p[i].key_; | |||
| entry_p[entry].value_ = entry_p[i].value_; | |||
| @@ -43,28 +48,29 @@ int Compress(HashmapEntry<T> *entry_p, const size_t &length, T entry) { | |||
| return compress_count; | |||
| } | |||
| void UpdateShape(size_t miss_count, const CNodePtr &node_) { | |||
| void UpdateShape(size_t miss_count, const CNodePtr &node) { | |||
| std::vector<size_t> out_shape; | |||
| (void)out_shape.emplace_back(miss_count); | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(node_); | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(node); | |||
| std::vector<TypeId> dtypes(output_num); | |||
| for (size_t i = 0; i < output_num; i++) { | |||
| dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); | |||
| dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node, i); | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetOutputInferShape(node_, 0), out_shape, out_shape, out_shape}, | |||
| node_.get()); | |||
| AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetOutputInferShape(node, 0), out_shape, out_shape, out_shape}, | |||
| node.get()); | |||
| } | |||
| void MapCacheIdxCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| node_wpt_ = kernel_node; | |||
| auto hashmap_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (hashmap_shape.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "Dimension of HashMap must be 2, (n, 4)"; | |||
| } | |||
| hashmap_length_ = hashmap_shape[0]; | |||
| if (hashmap_length_ <= 0) { | |||
| MS_LOG(INFO) << "Value of hashmap_length_ must > 0!"; | |||
| if (hashmap_length_ == 0) { | |||
| MS_LOG(EXCEPTION) << "Value of hashmap_length_ must > 0!"; | |||
| } | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| } | |||
| @@ -72,13 +78,14 @@ void MapCacheIdxCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool MapCacheIdxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMapCacheIdxInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMapCacheIdxOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeInt64) { | |||
| LaunchKernel<int64_t>(inputs, outputs); | |||
| } else { | |||
| MS_LOG(ERROR) << "Only support int32, int64"; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "Only support int32, int64"; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -86,8 +93,8 @@ bool MapCacheIdxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| template <typename T> | |||
| void MapCacheIdxCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto node_ = node_wpt_.lock(); | |||
| auto emb_idx_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1); | |||
| auto node = node_wpt_.lock(); | |||
| auto emb_idx_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1); | |||
| batch_size_ = 1; | |||
| for (size_t i = 0; i < emb_idx_shape.size(); ++i) { | |||
| batch_size_ *= emb_idx_shape[i]; | |||
| @@ -157,8 +164,8 @@ void MapCacheIdxCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| tag_count++; | |||
| } | |||
| hashmap[entry].key_ = emb_idx; | |||
| hashmap[entry].step_ = SizeToLong(step_[0]); | |||
| hashmap[entry].tag_ = SizeToLong(tag_count); | |||
| hashmap[entry].step_ = step_[0]; | |||
| hashmap[entry].tag_ = static_cast<T>(tag_count); | |||
| T tmp_entry = (entry + 1) % static_cast<T>(hashmap_length_); | |||
| size_t delete_count = 1; | |||
| while (hashmap[tmp_entry].IsEmpty() || hashmap[tmp_entry].IsUsing(step_[0])) { | |||
| @@ -184,7 +191,7 @@ void MapCacheIdxCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| for (size_t i = 0; i < miss_count; ++i) { | |||
| output_cache_idx[miss_idx[i]] = output_swap_cache_idx[i]; | |||
| } | |||
| UpdateShape(miss_count, node_); | |||
| UpdateShape(miss_count, node); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAP_CACHE_IDX_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAP_CACHE_IDX_CPU_KERNEL_H_ | |||
| @@ -35,10 +36,10 @@ class MapCacheIdxCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs); | |||
| private: | |||
| size_t batch_size_{1}; | |||
| size_t hashmap_length_{1}; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -22,21 +22,28 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kMapUniformInputsNum = 3; | |||
| constexpr size_t kMapUniformOutputsNum = 1; | |||
| } // namespace | |||
| void MapUniformCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| node_wpt_ = kernel_node; | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| } | |||
| bool MapUniformCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMapUniformInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMapUniformOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeInt64) { | |||
| LaunchKernel<int64_t>(inputs, outputs); | |||
| } else { | |||
| MS_LOG(ERROR) << "Only support int32, int64"; | |||
| return false; | |||
| MS_LOG(EXCEPTION) << "Only support int32, int64"; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -44,11 +51,11 @@ bool MapUniformCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| template <typename T> | |||
| void MapUniformCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto node_ = node_wpt_.lock(); | |||
| if (!node_) { | |||
| auto node = node_wpt_.lock(); | |||
| if (!node) { | |||
| MS_LOG(EXCEPTION) << "node_wpt_ is expired."; | |||
| } | |||
| auto input_x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); | |||
| auto input_x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0); | |||
| batch_size_ = 1; | |||
| for (size_t i = 0; i < input_x_shape.size(); ++i) { | |||
| batch_size_ *= input_x_shape[i]; | |||
| @@ -58,6 +65,9 @@ void MapUniformCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| auto per_group_size = *reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto group_num = *reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto output_x = reinterpret_cast<T *>(outputs[0]->addr); | |||
| if (group_num <= 0) { | |||
| MS_LOG(EXCEPTION) << "Group num should be greater than 0"; | |||
| } | |||
| T max_num = group_num * per_group_size; | |||
| for (size_t i = 0; i < batch_size_; ++i) { | |||
| output_x[i] = input_x[i] % group_num * per_group_size + input_x[i] / group_num; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAP_UNIFORM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAP_UNIFORM_CPU_KERNEL_H_ | |||
| @@ -35,10 +36,10 @@ class MapUniformCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs); | |||
| private: | |||
| size_t batch_size_{1}; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| CNodeWeakPtr node_wpt_; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -19,9 +19,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kMaximumInputsNum = 2; | |||
| constexpr size_t kMaximumOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void MaximumCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| input_y_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| @@ -42,18 +48,6 @@ void MaximumCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| } | |||
| } | |||
| template <typename T> | |||
| void MaximumCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaximumCPUKernel needs 2 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaximumCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| template <typename T> | |||
| void MaximumCPUKernel<T>::InitInputTensorAndScalar(size_t max_input_shape_size) { | |||
| if (max_input_shape_size != output_shape_.size()) { | |||
| @@ -77,6 +71,8 @@ void MaximumCPUKernel<T>::InitInputTensors(TypeId input_x_dtype, TypeId input_y_ | |||
| template <typename T> | |||
| bool MaximumCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaximumInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaximumOutputsNum, kernel_name_); | |||
| T *input_x_ = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *input_y_ = reinterpret_cast<T *>(inputs[1]->addr); | |||
| T *output_ = reinterpret_cast<T *>(outputs[0]->addr); | |||
| @@ -85,7 +81,7 @@ bool MaximumCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| } | |||
| template <typename T> | |||
| void MaximumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *output) { | |||
| void MaximumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *output) const { | |||
| MS_EXCEPTION_IF_NULL(input_x); | |||
| MS_EXCEPTION_IF_NULL(input_y); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| @@ -108,7 +104,7 @@ void MaximumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T * | |||
| } | |||
| template <typename T> | |||
| bool MaximumCPUKernel<T>::IsBroadcast() { | |||
| bool MaximumCPUKernel<T>::IsBroadcast() const { | |||
| if (input_x_shape_.size() != input_y_shape_.size()) { | |||
| return true; | |||
| } | |||
| @@ -122,12 +118,12 @@ bool MaximumCPUKernel<T>::IsBroadcast() { | |||
| template <typename T> | |||
| void MaximumCPUKernel<T>::InitTensorBroadcastShape() { | |||
| if (output_shape_.size() > max_dims) { | |||
| if (output_shape_.size() > max_dims_) { | |||
| MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7"; | |||
| } | |||
| broadcast_input_x_shape_.resize(max_dims, 1); | |||
| broadcast_input_y_shape_.resize(max_dims, 1); | |||
| broadcast_output_shape_.resize(max_dims, 1); | |||
| broadcast_input_x_shape_.resize(max_dims_, 1); | |||
| broadcast_input_y_shape_.resize(max_dims_, 1); | |||
| broadcast_output_shape_.resize(max_dims_, 1); | |||
| for (size_t i = 0; i < output_shape_.size(); i++) { | |||
| broadcast_output_shape_[i] = output_shape_[i]; | |||
| } | |||
| @@ -147,7 +143,7 @@ void MaximumCPUKernel<T>::InitTensorBroadcastShape() { | |||
| // Broadcast comparison | |||
| template <typename T> | |||
| size_t MaximumCPUKernel<T>::Index(const size_t &index, const size_t &dim) { | |||
| size_t MaximumCPUKernel<T>::Index(const size_t &index, const size_t &dim) const { | |||
| return dim == 1 ? 0 : index; | |||
| } | |||
| @@ -158,10 +154,7 @@ void MaximumCPUKernel<T>::BroadcastArithKernel(const size_t l0, const size_t l1, | |||
| const size_t r1, const size_t r2, const size_t r3, const size_t r4, | |||
| const size_t r5, const size_t r6, const size_t d0, const size_t d1, | |||
| const size_t d2, const size_t d3, const size_t d4, const size_t d5, | |||
| const size_t d6, const T *input_x, const T *input_y, T *output) { | |||
| MS_EXCEPTION_IF_NULL(input_x); | |||
| MS_EXCEPTION_IF_NULL(input_y); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| const size_t d6, const T *input_x, const T *input_y, T *output) const { | |||
| for (size_t pos = 0; pos < output_num_; pos++) { | |||
| size_t i = pos / (d1 * d2 * d3 * d4 * d5 * d6) % d0; | |||
| size_t j = pos / (d2 * d3 * d4 * d5 * d6) % d1; | |||
| @@ -190,10 +183,7 @@ void MaximumCPUKernel<T>::BroadcastArithKernel(const size_t l0, const size_t l1, | |||
| } | |||
| template <typename T> | |||
| void MaximumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) { | |||
| MS_EXCEPTION_IF_NULL(input_x); | |||
| MS_EXCEPTION_IF_NULL(input_y); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| void MaximumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) const { | |||
| if (input_x_shape_.size() == 0) { | |||
| for (size_t i = 0; i < output_num_; ++i) { | |||
| output[i] = MaximumFunc(input_x[0], input_y[i]); | |||
| @@ -206,10 +196,7 @@ void MaximumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, con | |||
| } | |||
| template <typename T> | |||
| void MaximumCPUKernel<T>::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) { | |||
| MS_EXCEPTION_IF_NULL(input_x); | |||
| MS_EXCEPTION_IF_NULL(input_y); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| void MaximumCPUKernel<T>::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) const { | |||
| for (size_t i = 0; i < output_num_; ++i) { | |||
| output[i] = MaximumFunc(input_x[i], input_y[i]); | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_CPU_KERNEL_H_ | |||
| @@ -34,11 +35,9 @@ class MaximumCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| bool IsBroadcast(); | |||
| bool IsBroadcast() const; | |||
| size_t Index(const size_t &index, const size_t &dim); | |||
| size_t Index(const size_t &index, const size_t &dim) const; | |||
| void InitTensorBroadcastShape(); | |||
| @@ -51,15 +50,15 @@ class MaximumCPUKernel : public CPUKernel { | |||
| const size_t l5, const size_t l6, const size_t r0, const size_t r1, const size_t r2, | |||
| const size_t r3, const size_t r4, const size_t r5, const size_t r6, const size_t d0, | |||
| const size_t d1, const size_t d2, const size_t d3, const size_t d4, const size_t d5, | |||
| const size_t d6, const T *input_x, const T *input_y, T *output); | |||
| const size_t d6, const T *input_x, const T *input_y, T *output) const; | |||
| T MaximumFunc(const T &lhs, const T &rhs) { return lhs > rhs ? lhs : rhs; } | |||
| T MaximumFunc(const T &lhs, const T &rhs) const { return lhs > rhs ? lhs : rhs; } | |||
| void BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output); | |||
| void BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) const; | |||
| void BroadcastArithTensors(const T *input_x, const T *input_y, T *output); | |||
| void BroadcastArithTensors(const T *input_x, const T *input_y, T *output) const; | |||
| void BroadcastArith(const T *input_x, const T *input_y, T *output); | |||
| void BroadcastArith(const T *input_x, const T *input_y, T *output) const; | |||
| private: | |||
| bool need_broadcast_{false}; | |||
| @@ -72,7 +71,7 @@ class MaximumCPUKernel : public CPUKernel { | |||
| std::vector<size_t> broadcast_input_x_shape_; | |||
| std::vector<size_t> broadcast_input_y_shape_; | |||
| std::vector<size_t> broadcast_output_shape_; | |||
| const size_t max_dims{7}; | |||
| const size_t max_dims_{7}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T(Maximum, KernelAttr(), MaximumCPUKernel, int32_t); | |||
| @@ -84,4 +83,4 @@ MS_REG_CPU_KERNEL_T(Maximum, KernelAttr(), MaximumCPUKernel, double); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UPDATE_CACHE_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_CPU_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -21,6 +21,9 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kMaximumGradInputsNum = 3; | |||
| constexpr size_t kMaximumGradOutputsNum = 2; | |||
| void CheckShape(std::vector<size_t> *shape) { | |||
| MS_EXCEPTION_IF_NULL(shape); | |||
| if (shape->empty()) { | |||
| @@ -30,7 +33,8 @@ void CheckShape(std::vector<size_t> *shape) { | |||
| } // namespace | |||
| void MaximumGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| y_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| dout_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| @@ -45,6 +49,8 @@ void MaximumGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool MaximumGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaximumGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaximumGradOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeUInt32) { | |||
| @@ -57,6 +63,8 @@ bool MaximumGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| LaunchKernel<uint64_t>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat64) { | |||
| LaunchKernel<double>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } | |||
| return true; | |||
| } | |||
| @@ -145,16 +153,5 @@ void MaximumGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, c | |||
| MaximumGradRecTask<T>(x_addr, y_addr, dout_addr, dx_addr, dy_addr, 0, 0, 0, 0, x_cargo, y_cargo, dout_cargo, x_shape, | |||
| y_shape, dout_shape); | |||
| } | |||
| void MaximumGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaximumGradCPUKernel needs 3 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaximumGradCPUKernel needs 2 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -14,8 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUMGRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUMGRAD_CPU_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -34,11 +35,10 @@ class MaximumGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| std::vector<size_t> y_shape_; | |||
| std::vector<size_t> dout_shape; | |||
| @@ -50,4 +50,5 @@ class MaximumGradCPUKernel : public CPUKernel { | |||
| MS_REG_CPU_KERNEL(MaximumGrad, KernelAttr(), MaximumGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MaximumGrad_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MAXIMUM_GRAD_CPU_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -19,9 +19,15 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kMinimumInputsNum = 2; | |||
| constexpr size_t kMinimumOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void MinimumCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| input_y_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| @@ -42,18 +48,6 @@ void MinimumCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| } | |||
| } | |||
| template <typename T> | |||
| void MinimumCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MinimumCPUKernel needs 2 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MinimumCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| template <typename T> | |||
| void MinimumCPUKernel<T>::InitInputTensorAndScalar(size_t max_input_shape_size) { | |||
| if (max_input_shape_size != output_shape_.size()) { | |||
| @@ -77,6 +71,8 @@ void MinimumCPUKernel<T>::InitInputTensors(TypeId input_x_dtype, TypeId input_y_ | |||
| template <typename T> | |||
| bool MinimumCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMinimumInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMinimumOutputsNum, kernel_name_); | |||
| T *input_x_ = reinterpret_cast<T *>(inputs[0]->addr); | |||
| T *input_y_ = reinterpret_cast<T *>(inputs[1]->addr); | |||
| T *output_ = reinterpret_cast<T *>(outputs[0]->addr); | |||
| @@ -85,7 +81,7 @@ bool MinimumCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| } | |||
| template <typename T> | |||
| void MinimumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *output) { | |||
| void MinimumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T *output) const { | |||
| MS_EXCEPTION_IF_NULL(input_x); | |||
| MS_EXCEPTION_IF_NULL(input_y); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| @@ -108,7 +104,7 @@ void MinimumCPUKernel<T>::BroadcastArith(const T *input_x, const T *input_y, T * | |||
| } | |||
| template <typename T> | |||
| bool MinimumCPUKernel<T>::IsBroadcast() { | |||
| bool MinimumCPUKernel<T>::IsBroadcast() const { | |||
| if (input_x_shape_.size() != input_y_shape_.size()) { | |||
| return true; | |||
| } | |||
| @@ -122,12 +118,12 @@ bool MinimumCPUKernel<T>::IsBroadcast() { | |||
| template <typename T> | |||
| void MinimumCPUKernel<T>::InitTensorBroadcastShape() { | |||
| if (output_shape_.size() > max_dims) { | |||
| if (output_shape_.size() > max_dims_) { | |||
| MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7"; | |||
| } | |||
| broadcast_input_x_shape_.resize(max_dims, 1); | |||
| broadcast_input_y_shape_.resize(max_dims, 1); | |||
| broadcast_output_shape_.resize(max_dims, 1); | |||
| broadcast_input_x_shape_.resize(max_dims_, 1); | |||
| broadcast_input_y_shape_.resize(max_dims_, 1); | |||
| broadcast_output_shape_.resize(max_dims_, 1); | |||
| for (size_t i = 0; i < output_shape_.size(); i++) { | |||
| broadcast_output_shape_[i] = output_shape_[i]; | |||
| } | |||
| @@ -147,7 +143,7 @@ void MinimumCPUKernel<T>::InitTensorBroadcastShape() { | |||
| // Broadcast comparison | |||
| template <typename T> | |||
| size_t MinimumCPUKernel<T>::Index(const size_t &index, const size_t &dim) { | |||
| size_t MinimumCPUKernel<T>::Index(const size_t &index, const size_t &dim) const { | |||
| return dim == 1 ? 0 : index; | |||
| } | |||
| @@ -158,10 +154,7 @@ void MinimumCPUKernel<T>::BroadcastArithKernel(const size_t l0, const size_t l1, | |||
| const size_t r1, const size_t r2, const size_t r3, const size_t r4, | |||
| const size_t r5, const size_t r6, const size_t d0, const size_t d1, | |||
| const size_t d2, const size_t d3, const size_t d4, const size_t d5, | |||
| const size_t d6, const T *input_x, const T *input_y, T *output) { | |||
| MS_EXCEPTION_IF_NULL(input_x); | |||
| MS_EXCEPTION_IF_NULL(input_y); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| const size_t d6, const T *input_x, const T *input_y, T *output) const { | |||
| for (size_t pos = 0; pos < output_num_; pos++) { | |||
| size_t i = pos / (d1 * d2 * d3 * d4 * d5 * d6) % d0; | |||
| size_t j = pos / (d2 * d3 * d4 * d5 * d6) % d1; | |||
| @@ -190,10 +183,7 @@ void MinimumCPUKernel<T>::BroadcastArithKernel(const size_t l0, const size_t l1, | |||
| } | |||
| template <typename T> | |||
| void MinimumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) { | |||
| MS_EXCEPTION_IF_NULL(input_x); | |||
| MS_EXCEPTION_IF_NULL(input_y); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| void MinimumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) const { | |||
| if (input_x_shape_.size() == 0) { | |||
| for (size_t i = 0; i < output_num_; ++i) { | |||
| output[i] = MinimumFunc(input_x[0], input_y[i]); | |||
| @@ -206,10 +196,7 @@ void MinimumCPUKernel<T>::BroadcastArithOneScalarOneTensor(const T *input_x, con | |||
| } | |||
| template <typename T> | |||
| void MinimumCPUKernel<T>::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) { | |||
| MS_EXCEPTION_IF_NULL(input_x); | |||
| MS_EXCEPTION_IF_NULL(input_y); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| void MinimumCPUKernel<T>::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) const { | |||
| for (size_t i = 0; i < output_num_; ++i) { | |||
| output[i] = MinimumFunc(input_x[i], input_y[i]); | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUM_CPU_KERNEL_H_ | |||
| @@ -34,11 +35,9 @@ class MinimumCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| bool IsBroadcast(); | |||
| bool IsBroadcast() const; | |||
| size_t Index(const size_t &index, const size_t &dim); | |||
| size_t Index(const size_t &index, const size_t &dim) const; | |||
| void InitTensorBroadcastShape(); | |||
| @@ -51,15 +50,15 @@ class MinimumCPUKernel : public CPUKernel { | |||
| const size_t l5, const size_t l6, const size_t r0, const size_t r1, const size_t r2, | |||
| const size_t r3, const size_t r4, const size_t r5, const size_t r6, const size_t d0, | |||
| const size_t d1, const size_t d2, const size_t d3, const size_t d4, const size_t d5, | |||
| const size_t d6, const T *input_x, const T *input_y, T *output); | |||
| const size_t d6, const T *input_x, const T *input_y, T *output) const; | |||
| T MinimumFunc(const T &lhs, const T &rhs) { return lhs < rhs ? lhs : rhs; } | |||
| T MinimumFunc(const T &lhs, const T &rhs) const { return lhs < rhs ? lhs : rhs; } | |||
| void BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output); | |||
| void BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) const; | |||
| void BroadcastArithTensors(const T *input_x, const T *input_y, T *output); | |||
| void BroadcastArithTensors(const T *input_x, const T *input_y, T *output) const; | |||
| void BroadcastArith(const T *input_x, const T *input_y, T *output); | |||
| void BroadcastArith(const T *input_x, const T *input_y, T *output) const; | |||
| private: | |||
| bool need_broadcast_{false}; | |||
| @@ -72,7 +71,7 @@ class MinimumCPUKernel : public CPUKernel { | |||
| std::vector<size_t> broadcast_input_x_shape_; | |||
| std::vector<size_t> broadcast_input_y_shape_; | |||
| std::vector<size_t> broadcast_output_shape_; | |||
| const size_t max_dims{7}; | |||
| const size_t max_dims_{7}; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T(Minimum, KernelAttr(), MinimumCPUKernel, int32_t); | |||
| @@ -84,4 +83,4 @@ MS_REG_CPU_KERNEL_T(Minimum, KernelAttr(), MinimumCPUKernel, double); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UPDATE_CACHE_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUM_CPU_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -21,6 +21,9 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kMinimumGradInputsNum = 3; | |||
| constexpr size_t kMinimumGradOutputsNum = 2; | |||
| void GetCargo(std::vector<size_t> *cargo, const std::vector<size_t> &shape, const std::vector<size_t> &dout_shape) { | |||
| int i = dout_shape.size() - 1; | |||
| int j = shape.size() - 1; | |||
| @@ -58,7 +61,8 @@ void CheckShape(std::vector<size_t> *shape) { | |||
| } // namespace | |||
| void MinimumGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| y_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| dout_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| @@ -73,6 +77,8 @@ void MinimumGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool MinimumGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMinimumGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMinimumGradOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeInt32) { | |||
| LaunchKernel<int>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeUInt32) { | |||
| @@ -115,11 +121,11 @@ void MinimumGradRecTask(const T *x, const T *y, const T *dout, T *dx, T *dy, con | |||
| template <typename T> | |||
| void MinimumGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| auto x_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto y_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto dout_addr = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto dx_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto dy_addr = reinterpret_cast<T *>(outputs[1]->addr); | |||
| auto *x_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *y_addr = reinterpret_cast<T *>(inputs[1]->addr); | |||
| auto *dout_addr = reinterpret_cast<T *>(inputs[2]->addr); | |||
| auto *dx_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto *dy_addr = reinterpret_cast<T *>(outputs[1]->addr); | |||
| size_t x_tensor_len = GetTensorLen(x_shape_); | |||
| size_t y_tensor_len = GetTensorLen(y_shape_); | |||
| @@ -146,16 +152,5 @@ void MinimumGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, c | |||
| MinimumGradRecTask<T>(x_addr, y_addr, dout_addr, dx_addr, dy_addr, 0, 0, 0, 0, x_cargo, y_cargo, dout_cargo, x_shape, | |||
| y_shape, dout_shape); | |||
| } | |||
| void MinimumGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 3) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MinimumGradCPUKernel needs 3 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MinimumGradCPUKernel needs 2 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUMGRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUMGRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -34,11 +35,10 @@ class MinimumGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> x_shape_; | |||
| std::vector<size_t> y_shape_; | |||
| std::vector<size_t> dout_shape; | |||
| @@ -50,4 +50,4 @@ class MinimumGradCPUKernel : public CPUKernel { | |||
| MS_REG_CPU_KERNEL(MinimumGrad, KernelAttr(), MinimumGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MinimumGrad_CPU_KERNEL_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MINIMUMGRAD_CPU_KERNEL_H_ | |||
| @@ -33,9 +33,13 @@ constexpr int TOP = 0; | |||
| constexpr int BOTTOM = 1; | |||
| constexpr int LEFT = 0; | |||
| constexpr int RIGHT = 1; | |||
| constexpr size_t kMirrorPadInputsNum = 2; | |||
| constexpr size_t kMirrorPadOutputsNum = 1; | |||
| } // namespace | |||
| void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::string mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "mode"); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (mode == "REFLECT") { | |||
| @@ -50,12 +54,10 @@ void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| shape_size_ = input_shape.size(); | |||
| if (shape_size_ == 4) { // shape adjustment from 2d/3d to 4d | |||
| } else if (shape_size_ == 3) { | |||
| auto it = input_shape.begin(); | |||
| input_shape.insert(it, 1); // batch padding | |||
| (void)input_shape.insert(input_shape.begin(), 1); // batch padding | |||
| shape_size_ = 4; | |||
| } else if (shape_size_ == 2) { | |||
| auto it = input_shape.begin(); | |||
| input_shape.insert(it, 2, 1); // channel padding | |||
| (void)input_shape.insert(input_shape.begin(), 2, 1); // channel padding | |||
| shape_size_ = 4; | |||
| } | |||
| @@ -63,6 +65,7 @@ void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| tensor_size_ *= input_shape[i]; | |||
| input_shape_.push_back(SizeToLong(input_shape[i])); | |||
| } | |||
| std::vector<size_t> padding_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| num_paddings_ = SizeToLong(padding_shape[0]); | |||
| @@ -74,6 +77,7 @@ void MirrorPadCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| int64_t max_width = input_shape_[3]; | |||
| int64_t max_height = input_shape_[2]; | |||
| if (mode_ == 1) { // symmetric | |||
| max_width = max_width + (2 * max_width); | |||
| max_height = max_height + (2 * max_height); | |||
| @@ -97,6 +101,8 @@ void extract_paddings(const int64_t *paddings_arg, int64_t padd_dim, int64_t *ex | |||
| bool MirrorPadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMirrorPadInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMirrorPadOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -112,7 +118,8 @@ bool MirrorPadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, c | |||
| } | |||
| template <typename T> | |||
| void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) const { | |||
| auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| int64_t *paddings_arg = reinterpret_cast<int64_t *>(inputs[1]->addr); | |||
| auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| @@ -126,6 +133,7 @@ void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, con | |||
| const int64_t padded_height = output_shape_[dim_offset]; | |||
| const int64_t padded_width = output_shape_[dim_offset + 1]; | |||
| const int64_t padd_dim = num_paddings_; | |||
| const int64_t mode = mode_; | |||
| int64_t paddings[MAX_PADDINGS * PADDING_SIZE]; // local and fixed size to keep in registers | |||
| @@ -190,16 +198,5 @@ void MirrorPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, con | |||
| outputs_addr[pos] = inputs_addr[pos_index]; | |||
| } | |||
| } | |||
| void MirrorPadCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MirrorPadCPUKernel needs 2 inputs."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MirrorPadCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -35,19 +36,18 @@ class MirrorPadCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| size_t tensor_size_ = 1; | |||
| size_t shape_size_; | |||
| size_t output_size_ = 1; | |||
| size_t tensor_size_{1}; | |||
| size_t shape_size_{0}; | |||
| size_t output_size_{1}; | |||
| int64_t mode_{0}; | |||
| int64_t num_paddings_{0}; | |||
| std::vector<int64_t> input_shape_; | |||
| std::vector<int64_t> output_shape_; | |||
| int64_t mode_; | |||
| int64_t num_paddings_; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| @@ -33,8 +33,28 @@ constexpr int TOP = 0; | |||
| constexpr int BOTTOM = 1; | |||
| constexpr int LEFT = 0; | |||
| constexpr int RIGHT = 1; | |||
| constexpr size_t kMirrorPadGradInputsNum = 2; | |||
| constexpr size_t kMirrorPadGradOutputsNum = 1; | |||
| void extract_paddings(const int64_t *paddings_arg, int64_t padd_dim, int64_t *extracted_paddings) { | |||
| const int64_t paddings_offset = MAX_PADDINGS - padd_dim; | |||
| for (int64_t i = 0; i < padd_dim; i++) { | |||
| extracted_paddings[(paddings_offset + i) * PADDING_SIZE] = paddings_arg[i * PADDING_SIZE]; | |||
| extracted_paddings[(paddings_offset + i) * PADDING_SIZE + 1] = paddings_arg[i * PADDING_SIZE + 1]; | |||
| } | |||
| } | |||
| bool range_check(int64_t x, int64_t y, int64_t padded_width, int64_t padded_height) { | |||
| if (((x >= 0) && (x <= padded_width - 1)) && ((y >= 0) && (y <= padded_height - 1))) { | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| } // namespace | |||
| void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::string mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, "mode"); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| if (mode == "REFLECT") { | |||
| @@ -49,12 +69,10 @@ void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| shape_size_ = input_shape.size(); | |||
| if (shape_size_ == 4) { // shape adjustment from 2d/3d to 4d | |||
| } else if (shape_size_ == 3) { | |||
| auto it = input_shape.begin(); | |||
| input_shape.insert(it, 1); // batch padding | |||
| (void)input_shape.insert(input_shape.begin(), 1); // batch padding | |||
| shape_size_ = 4; | |||
| } else if (shape_size_ == 2) { | |||
| auto it = input_shape.begin(); | |||
| input_shape.insert(it, 2, 1); // channel padding | |||
| (void)input_shape.insert(input_shape.begin(), 2, 1); // channel padding | |||
| shape_size_ = 4; | |||
| } | |||
| @@ -70,11 +88,9 @@ void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| if (output_shape.size() == 4) { | |||
| } else if (output_shape.size() == 3) { | |||
| auto it = output_shape.begin(); | |||
| output_shape.insert(it, 1); // batch padding | |||
| (void)output_shape.insert(output_shape.begin(), 1); // batch padding | |||
| } else if (output_shape.size() == 2) { | |||
| auto it = output_shape.begin(); | |||
| output_shape.insert(it, 2, 1); // channel padding | |||
| (void)output_shape.insert(output_shape.begin(), 2, 1); // channel padding | |||
| } | |||
| for (auto x : output_shape) { | |||
| output_size_ *= x; | |||
| @@ -103,24 +119,11 @@ void MirrorPadGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| } | |||
| } | |||
| void extract_paddings_(const int64_t *paddings_arg, int64_t padd_dim, int64_t *extracted_paddings) { | |||
| const int64_t paddings_offset = MAX_PADDINGS - padd_dim; | |||
| for (int64_t i = 0; i < padd_dim; i++) { | |||
| extracted_paddings[(paddings_offset + i) * PADDING_SIZE] = paddings_arg[i * PADDING_SIZE]; | |||
| extracted_paddings[(paddings_offset + i) * PADDING_SIZE + 1] = paddings_arg[i * PADDING_SIZE + 1]; | |||
| } | |||
| } | |||
| bool range_check(int64_t x, int64_t y, int64_t padded_width, int64_t padded_height) { | |||
| if (((x >= 0) && (x <= padded_width - 1)) && ((y >= 0) && (y <= padded_height - 1))) { | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| bool MirrorPadGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMirrorPadGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMirrorPadGradOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, workspace, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -158,12 +161,12 @@ template <typename T> | |||
| void MirrorPadGradCPUKernel::MirrorPadGrad_Width_Height(const size_t size, const T *interim_dy, const int64_t dx_height, | |||
| const int64_t dx_width, const int64_t dy_height, | |||
| const int64_t dy_width, const int64_t padd_dim, | |||
| const int64_t *paddings_arg, int64_t mode, T *dx) { | |||
| const int64_t *paddings_arg, int64_t mode, T *dx) const { | |||
| int64_t paddings[MAX_PADDINGS * PADDING_SIZE]; // local and fixed size to keep in registers | |||
| for (int i = 0; i < MAX_PADDINGS * PADDING_SIZE; i++) { | |||
| paddings[i] = 0; // init all to 0 | |||
| } | |||
| extract_paddings_(paddings_arg, padd_dim, paddings); | |||
| extract_paddings(paddings_arg, padd_dim, paddings); | |||
| // Create required anchor points for non-mirrored data inside new tensor | |||
| int64_t ap1_x = paddings[WIDTH]; | |||
| int64_t ap2_x = paddings[WIDTH] + dx_width - 1; | |||
| @@ -216,7 +219,6 @@ void MirrorPadGradCPUKernel::MirrorPadGrad_Width_Height(const size_t size, const | |||
| } | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| template <typename T> | |||
| @@ -224,12 +226,12 @@ void MirrorPadGradCPUKernel::MirrorPadGradBatchChannel(const size_t size, T *dy, | |||
| const int64_t dx_batches, const int64_t dx_channels, | |||
| const int64_t dy_height, const int64_t dy_width, | |||
| const int64_t padd_dim, const int64_t *paddings_arg, | |||
| int64_t mode) { | |||
| int64_t mode) const { | |||
| int64_t paddings[MAX_PADDINGS * PADDING_SIZE]; // local and fixed size to keep in registers | |||
| for (int i = 0; i < MAX_PADDINGS * PADDING_SIZE; i++) { | |||
| paddings[i] = 0; // init all to 0 | |||
| } | |||
| extract_paddings_(paddings_arg, padd_dim, paddings); | |||
| extract_paddings(paddings_arg, padd_dim, paddings); | |||
| // Create anchor points for non mirrored data inside new tensor | |||
| int64_t ap1_channel = paddings[CHANNEL]; | |||
| int64_t ap2_channel = paddings[CHANNEL] + dx_channels - 1; | |||
| @@ -273,17 +275,16 @@ void MirrorPadGradCPUKernel::MirrorPadGradBatchChannel(const size_t size, T *dy, | |||
| } | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| template <typename T> | |||
| void MirrorPadGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| int64_t *paddings = reinterpret_cast<int64_t *>(inputs[1]->addr); | |||
| auto interim = reinterpret_cast<T *>(workspace[0]->addr); | |||
| auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| const std::vector<AddressPtr> &outputs) const { | |||
| auto *inputs_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *paddings = reinterpret_cast<int64_t *>(inputs[1]->addr); | |||
| auto *interim = reinterpret_cast<T *>(workspace[0]->addr); | |||
| auto *outputs_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| MirrorPadGradBatchChannel(workspace_size_, inputs_addr, interim, output_shape_[0], output_shape_[1], input_shape_[2], | |||
| input_shape_[3], num_paddings_, paddings, mode_); | |||
| @@ -291,16 +292,5 @@ void MirrorPadGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, | |||
| MirrorPadGrad_Width_Height(output_size_, interim, output_shape_[2], output_shape_[3], input_shape_[2], | |||
| input_shape_[3], num_paddings_, paddings, mode_, outputs_addr); | |||
| } | |||
| void MirrorPadGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MirrorPadGradCPUKernel needs 2 inputs."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MirrorPadGradCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -16,13 +16,13 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MIRROR_PAD_GRAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class MirrorPadGradCPUKernel : public CPUKernel { | |||
| @@ -36,34 +36,33 @@ class MirrorPadGradCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void InitWorkspaceSize(); | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs); | |||
| const std::vector<AddressPtr> &outputs) const; | |||
| template <typename T> | |||
| void MirrorPadGrad_Width_Height(const size_t size, const T *interim_dy, const int64_t dx_height, | |||
| const int64_t dx_width, const int64_t dy_height, const int64_t dy_width, | |||
| const int64_t padd_dim, const int64_t *paddings_arg, int64_t mode, T *dx); | |||
| const int64_t padd_dim, const int64_t *paddings_arg, int64_t mode, T *dx) const; | |||
| template <typename T> | |||
| void MirrorPadGradBatchChannel(const size_t size, T *dy, T *interim_dy, const int64_t dx_batches, | |||
| const int64_t dx_channels, const int64_t dy_height, const int64_t dy_width, | |||
| const int64_t padd_dim, const int64_t *paddings_arg, int64_t mode); | |||
| const int64_t padd_dim, const int64_t *paddings_arg, int64_t mode) const; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| TypeId dtype_{kTypeUnknown}; | |||
| size_t tensor_size_ = 1; | |||
| size_t shape_size_; | |||
| size_t output_size_ = 1; | |||
| size_t workspace_size_ = 1; | |||
| size_t tensor_size_{1}; | |||
| size_t shape_size_{1}; | |||
| size_t output_size_{1}; | |||
| size_t workspace_size_{1}; | |||
| int mode_{0}; | |||
| int64_t num_paddings_{0}; | |||
| std::vector<int64_t> input_shape_; | |||
| std::vector<int64_t> output_shape_; | |||
| int64_t mode_; | |||
| int64_t num_paddings_; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/pooling_avg_grad_cpu_kernel.h" | |||
| #include <string> | |||
| #include <utility> | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -25,7 +25,8 @@ namespace kernel { | |||
| constexpr size_t kPoolingMinDim = 4; | |||
| constexpr size_t kPoolingMaxDim = 5; | |||
| constexpr size_t kPoolingOffsetDim = 2; | |||
| constexpr size_t kPoolingInputsNum = 1; | |||
| constexpr size_t kPoolingOutputsNum = 1; | |||
| void PoolingCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| (void)workspace_size_list_.emplace_back(workspace_size_); | |||
| @@ -33,6 +34,7 @@ void PoolingCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| @@ -78,8 +80,7 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| dnnl::pooling_forward::desc desc = | |||
| dnnl::pooling_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::pooling_max, src_desc, dst_desc, | |||
| strides_dims, kernels_dims, padding_l, padding_r); | |||
| std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (kernel_name == prim::kPrimAvgPool->name() || kernel_name == prim::kPrimAvgPool3D->name()) { | |||
| if (kernel_name_ == prim::kPrimAvgPool->name()) { | |||
| desc = dnnl::pooling_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::pooling_avg, src_desc, | |||
| dst_desc, strides_dims, kernels_dims, padding_l, padding_r); | |||
| } | |||
| @@ -94,9 +95,8 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool PoolingCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kPoolingInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kPoolingOutputsNum, kernel_name_); | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WORKSPACE, workspace[0]->addr); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_POOLING_CPU_KERNEL_H_ | |||
| @@ -45,8 +46,6 @@ MS_REG_CPU_KERNEL(MaxPool3D, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOu | |||
| PoolingCPUKernel); | |||
| MS_REG_CPU_KERNEL(AvgPool, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| PoolingCPUKernel); | |||
| MS_REG_CPU_KERNEL(AvgPool3D, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| PoolingCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/pooling_max_grad_cpu_kernel.h" | |||
| #include <string> | |||
| #include <utility> | |||
| @@ -117,13 +118,13 @@ bool MaxPoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inpu | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kMaxPoolingGradInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kMaxPoolingGradOutputsNum, kernel_name_); | |||
| auto input = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto diff = reinterpret_cast<float *>(inputs[2]->addr); | |||
| auto output = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto ret = memset_s(output, outputs[0]->size, 0, outputs[0]->size); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "Pooling grad memset error!"; | |||
| MS_LOG(EXCEPTION) << "Pooling grad memset error, ret value:" << ret << ", output address: " << output | |||
| << ", memset size: " << outputs[0]->size; | |||
| } | |||
| size_t src_wh = src_shape_[2] * src_shape_[3]; | |||
| size_t dst_wh = dst_shape_[2] * dst_shape_[3]; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h" | |||
| #include <algorithm> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| @@ -21,8 +22,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSoftmaxInputsNum = 1; | |||
| constexpr size_t kSoftmaxOutputsNum = 1; | |||
| } // namespace | |||
| void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<int> axis_list; | |||
| std::vector<int64_t> axis_list_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, AXIS); | |||
| @@ -48,9 +55,8 @@ void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool SoftmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Softmax error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSoftmaxInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSoftmaxOutputsNum, kernel_name_); | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SOFTMAX_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SOFTMAX_CPU_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/softmax_cross_entropy_with_logits_cpu_kernel.h" | |||
| #include <numeric> | |||
| #include <limits> | |||
| #include <functional> | |||
| #include <cmath> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| @@ -23,6 +25,12 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSoftmaxCrossEntropyWithLogitsInputsNum = 2; | |||
| constexpr size_t kSoftmaxCrossEntropyWithLogitsOutputsNum = 2; | |||
| constexpr size_t kSoftmaxCrossEntropyWithLogitsWorkspaceSize = 1; | |||
| } // namespace | |||
| void SoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| @@ -34,9 +42,10 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr | |||
| void SoftmaxCrossEntropyWithLogitsCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| dnnl::memory::dims mem_dims; | |||
| mem_dims.insert(mem_dims.end(), shape.begin(), shape.end()); | |||
| (void)mem_dims.insert(mem_dims.end(), shape.begin(), shape.end()); | |||
| if (mem_dims.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "SoftmaxCrossEntropyWithLogits kernel dims invalid " << mem_dims.size(); | |||
| } | |||
| @@ -73,9 +82,10 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *log | |||
| bool SoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || workspace.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSoftmaxCrossEntropyWithLogitsInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSoftmaxCrossEntropyWithLogitsOutputsNum, kernel_name_); | |||
| CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSoftmaxCrossEntropyWithLogitsWorkspaceSize, kernel_name_); | |||
| size_t batch_float_size = batch_size_ * sizeof(float); | |||
| size_t batch_class_float_size = class_num_ * batch_float_size; | |||
| if (inputs[0]->size != workspace[0]->size || inputs[0]->size != batch_class_float_size || | |||
| @@ -88,10 +98,10 @@ bool SoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kernel::Ad | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, workspace[0]->addr); | |||
| ExecutePrimitive(); | |||
| auto labels = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto logits = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto output1 = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto output2 = reinterpret_cast<float *>(outputs[1]->addr); | |||
| const auto *labels = reinterpret_cast<float *>(inputs[1]->addr); | |||
| const auto *logits = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto *output1 = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto *output2 = reinterpret_cast<float *>(outputs[1]->addr); | |||
| ForwardPostExecute(logits, labels, output1, output2); | |||
| return true; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h" | |||
| #include <numeric> | |||
| #include <limits> | |||
| #include <functional> | |||
| #include <cmath> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| @@ -23,6 +25,12 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kSparseSoftmaxCrossEntropyWithLogitsInputsNum = 2; | |||
| constexpr size_t kSparseSoftmaxCrossEntropyWithLogitsOutputsNum = 1; | |||
| constexpr size_t kSparseSoftmaxCrossEntropyWithLogitsWorkspaceSize = 1; | |||
| } // namespace | |||
| void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| CPUKernel::InitInputOutputSize(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| @@ -34,13 +42,14 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNo | |||
| void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> label_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| if (label_shape.size() > 1) { | |||
| MS_LOG(EXCEPTION) << "Labels shape length should be equal to Logits shape length minus 1"; | |||
| } | |||
| dnnl::memory::dims mem_dims; | |||
| mem_dims.insert(mem_dims.end(), shape.begin(), shape.end()); | |||
| (void)mem_dims.insert(mem_dims.end(), shape.begin(), shape.end()); | |||
| if (mem_dims.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "SparseSoftmaxCrossEntropyWithLogits kernel dims invalid " << mem_dims.size(); | |||
| } | |||
| @@ -66,7 +75,7 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int | |||
| float epsilon = std::numeric_limits<float>::min(); | |||
| for (size_t i = 0; i < batch_size_; ++i) { | |||
| if (labels[i] < 0) { | |||
| MS_LOG(EXCEPTION) << "Label value must >= 0!"; | |||
| MS_LOG(EXCEPTION) << "Label value must >= 0"; | |||
| } | |||
| size_t label = IntToSize(labels[i]); | |||
| if (label > class_num_) { | |||
| @@ -82,7 +91,7 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *la | |||
| size_t row_start = 0; | |||
| for (size_t i = 0; i < batch_size_; ++i) { | |||
| if (labels[i] < 0) { | |||
| MS_LOG(EXCEPTION) << "Label value must >= 0!"; | |||
| MS_LOG(EXCEPTION) << "Label value must >= 0"; | |||
| } | |||
| size_t label = IntToSize(labels[i]); | |||
| if (label > class_num_) { | |||
| @@ -103,9 +112,9 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *la | |||
| bool SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || workspace.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Error input output size!"; | |||
| } | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseSoftmaxCrossEntropyWithLogitsInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseSoftmaxCrossEntropyWithLogitsOutputsNum, kernel_name_); | |||
| CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseSoftmaxCrossEntropyWithLogitsWorkspaceSize, kernel_name_); | |||
| size_t batch_float_size = batch_size_ * sizeof(float); | |||
| size_t batch_class_float_size = class_num_ * batch_float_size; | |||
| if (inputs[0]->size != workspace[0]->size || inputs[0]->size != batch_class_float_size || | |||
| @@ -120,9 +129,9 @@ bool SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kern | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, workspace[0]->addr); | |||
| ExecutePrimitive(); | |||
| auto labels = reinterpret_cast<int *>(inputs[1]->addr); | |||
| auto losses = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto output = reinterpret_cast<float *>(outputs[0]->addr); | |||
| const auto *labels = reinterpret_cast<int *>(inputs[1]->addr); | |||
| const auto *losses = reinterpret_cast<float *>(workspace[0]->addr); | |||
| auto *output = reinterpret_cast<float *>(outputs[0]->addr); | |||
| if (is_grad_) { | |||
| GradPostExecute(labels, losses, output); | |||
| } else { | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,6 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_ | |||
| @@ -32,10 +33,8 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public MKLCPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| private: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| void ForwardPostExecute(const int *labels, const float *losses, float *output) const; | |||
| void GradPostExecute(const int *labels, const float *losses, float *output) const; | |||
| bool is_grad_{false}; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,13 +13,20 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/one_hot_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kOneHotInputsNum = 3; | |||
| constexpr size_t kOneHotOutputsNum = 1; | |||
| } // namespace | |||
| void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| if (output_shape.size() < 2) { | |||
| MS_LOG(EXCEPTION) << "Invalid output shape size: " << output_shape.size(); | |||
| @@ -28,6 +35,7 @@ void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| if (axis != -1 && LongToSize(axis) >= output_shape.size()) { | |||
| MS_LOG(EXCEPTION) << "Invalid axis: " << axis; | |||
| } | |||
| if (axis == -1) { | |||
| axis_ = output_shape.size() - 1; | |||
| } else { | |||
| @@ -42,13 +50,12 @@ void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool OneHotCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 3 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Input or output invalid!"; | |||
| } | |||
| auto indices = reinterpret_cast<int *>(inputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kOneHotInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOneHotOutputsNum, kernel_name_); | |||
| const auto *indices = reinterpret_cast<int *>(inputs[0]->addr); | |||
| auto on_value = reinterpret_cast<float *>(inputs[1]->addr)[0]; | |||
| auto off_value = reinterpret_cast<float *>(inputs[2]->addr)[0]; | |||
| auto output = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto *output = reinterpret_cast<float *>(outputs[0]->addr); | |||
| size_t elem_num = inputs[0]->size / sizeof(int); | |||
| auto task = [this, &indices, &on_value, &off_value, &output](size_t start, size_t end) { | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ONE_HOT_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ONE_HOT_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -33,9 +35,9 @@ class OneHotCPUKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| size_t depth_; | |||
| size_t stride_; | |||
| size_t axis_; | |||
| size_t depth_{0}; | |||
| size_t stride_{0}; | |||
| size_t axis_{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(OneHot, KernelAttr(), OneHotCPUKernel); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -20,17 +20,16 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| PackCpuFwdKernel<T>::PackCpuFwdKernel() | |||
| : axis_(0), input_num_(1), output_size_(0), dims_behind_axis_(1), inputs_host_(nullptr) {} | |||
| namespace { | |||
| constexpr size_t kPackOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void PackCpuFwdKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_num_ = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| axis_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS); | |||
| if (axis_ < 0) { | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| axis_ += (SizeToInt(input_shape.size()) + 1); | |||
| @@ -52,11 +51,9 @@ void PackCpuFwdKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| bool PackCpuFwdKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| if (!CheckParam(outputs)) { | |||
| return false; | |||
| } | |||
| auto output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num_, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kPackOutputsNum, kernel_name_); | |||
| auto *output = reinterpret_cast<T *>(outputs[0]->addr); | |||
| inputs_host_ = std::make_unique<T *[]>(input_num_); | |||
| for (size_t i = 0; i < inputs.size(); i++) { | |||
| inputs_host_[i] = reinterpret_cast<T *>(inputs[i]->addr); | |||
| @@ -90,16 +87,7 @@ bool PackCpuFwdKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const st | |||
| } | |||
| template <typename T> | |||
| bool PackCpuFwdKernel<T>::CheckParam(const std::vector<AddressPtr> &outputs) const { | |||
| if (outputs.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but PackGpuFwdKernel needs 1 output."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| template <typename T> | |||
| void PackCpuFwdKernel<T>::PackTensor(T *output, size_t start, size_t end) { | |||
| void PackCpuFwdKernel<T>::PackTensor(T *output, size_t start, size_t end) const { | |||
| for (size_t pos = start; pos < end; ++pos) { | |||
| size_t cur_input_index = pos / dims_behind_axis_ % input_num_; | |||
| size_t cycle_len = input_num_ * dims_behind_axis_; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,9 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_PACK_CPU_KERNEL_H | |||
| #define MINDSPORE_PACK_CPU_KERNEL_H | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PACK_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PACK_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| @@ -26,7 +27,7 @@ namespace kernel { | |||
| template <typename T> | |||
| class PackCpuFwdKernel : public CPUKernel { | |||
| public: | |||
| PackCpuFwdKernel(); | |||
| PackCpuFwdKernel() = default; | |||
| ~PackCpuFwdKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| @@ -34,14 +35,13 @@ class PackCpuFwdKernel : public CPUKernel { | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| bool CheckParam(const std::vector<AddressPtr> &outputs) const; | |||
| void PackTensor(T *output, size_t start, size_t end); | |||
| void PackTensor(T *output, size_t start, size_t end) const; | |||
| int axis_; | |||
| size_t input_num_; | |||
| size_t output_size_; | |||
| size_t dims_behind_axis_; | |||
| std::unique_ptr<T *[]> inputs_host_; | |||
| int axis_{0}; | |||
| size_t input_num_{1}; | |||
| size_t output_size_{0}; | |||
| size_t dims_behind_axis_{1}; | |||
| std::unique_ptr<T *[]> inputs_host_ { nullptr }; | |||
| }; | |||
| MS_REG_CPU_KERNEL_T(Stack, KernelAttr(), PackCpuFwdKernel, int8_t) | |||
| @@ -57,4 +57,4 @@ MS_REG_CPU_KERNEL_T(Stack, KernelAttr(), PackCpuFwdKernel, float) | |||
| MS_REG_CPU_KERNEL_T(Stack, KernelAttr(), PackCpuFwdKernel, bool) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_PACK_CPU_KERNEL_H | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PACK_CPU_KERNEL_H_ | |||
| @@ -19,7 +19,14 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kPadInputsNum = 1; | |||
| constexpr size_t kPadOutputsNum = 1; | |||
| } // namespace | |||
| void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| paddings_ = AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "paddings"); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| @@ -27,12 +34,10 @@ void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| shape_size_ = input_shape.size(); | |||
| if (shape_size_ == 4) { // shape adjustment from 2d/3d to 4d | |||
| } else if (shape_size_ == 3) { | |||
| auto it = input_shape.begin(); | |||
| input_shape.insert(it, 1); // batch padding | |||
| (void)input_shape.insert(input_shape.begin(), 1); // batch padding | |||
| shape_size_ = 4; | |||
| } else if (shape_size_ == 2) { | |||
| auto it = input_shape.begin(); | |||
| input_shape.insert(it, 2, 1); // channel padding | |||
| (void)input_shape.insert(input_shape.begin(), 2, 1); // channel padding | |||
| shape_size_ = 4; | |||
| } | |||
| @@ -43,11 +48,9 @@ void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| if (paddings_.size() == 4) { // shape adjustment from 2d/3d to 4d | |||
| } else if (paddings_.size() == 3) { | |||
| auto it = paddings_.begin(); | |||
| paddings_.insert(it, 1, {0, 0}); // batch padding | |||
| (void)paddings_.insert(paddings_.begin(), 1, {0, 0}); // batch padding | |||
| } else if (paddings_.size() == 2) { | |||
| auto it = paddings_.begin(); | |||
| paddings_.insert(it, 2, {0, 0}); // channel padding | |||
| (void)paddings_.insert(paddings_.begin(), 2, {0, 0}); // channel padding | |||
| } | |||
| for (size_t i = 0; i < shape_size_; i++) { | |||
| @@ -59,6 +62,8 @@ void PadCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| bool PadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kPadInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kPadOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeFloat16) { | |||
| LaunchKernel<float16>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -74,9 +79,9 @@ bool PadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const s | |||
| } | |||
| template <typename T> | |||
| void PadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| auto inputs_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto outputs_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| void PadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const { | |||
| const auto *inputs_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *outputs_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| const int pad_left = paddings_[3][0]; | |||
| const int pad_top = paddings_[2][0]; | |||
| @@ -112,16 +117,5 @@ void PadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std | |||
| } | |||
| } | |||
| } | |||
| void PadCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but PadCPUKernel needs 1 input."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but PadCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_PAD_CPU_KERNEL_H_ | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| @@ -34,16 +35,15 @@ class PadCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| template <typename T> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<std::vector<int64_t>> paddings_; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| uint64_t tensor_size_ = 1; | |||
| size_t shape_size_ = 1; | |||
| uint64_t output_size_ = 1; | |||
| uint64_t tensor_size_{1}; | |||
| size_t shape_size_{1}; | |||
| uint64_t output_size_{1}; | |||
| std::vector<std::vector<int64_t>> paddings_; | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| }; | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,18 +13,27 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/range_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kRangeInputsNum = 3; | |||
| constexpr size_t kRangeOutputsNum = 1; | |||
| } // namespace | |||
| void RangeCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); | |||
| } | |||
| bool RangeCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kRangeInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kRangeOutputsNum, kernel_name_); | |||
| if (dtype_ == kNumberTypeInt32) { | |||
| return LaunchKernel<int32_t>(inputs, outputs); | |||
| } else if (dtype_ == kNumberTypeFloat32) { | |||
| @@ -35,19 +44,19 @@ bool RangeCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const | |||
| } | |||
| template <typename T> | |||
| bool RangeCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { | |||
| T start_ = reinterpret_cast<T *>(inputs[0]->addr)[0]; | |||
| T limit_ = reinterpret_cast<T *>(inputs[1]->addr)[0]; | |||
| T delta_ = reinterpret_cast<T *>(inputs[2]->addr)[0]; | |||
| bool RangeCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const { | |||
| auto start = reinterpret_cast<T *>(inputs[0]->addr)[0]; | |||
| auto limit = reinterpret_cast<T *>(inputs[1]->addr)[0]; | |||
| auto delta = reinterpret_cast<T *>(inputs[2]->addr)[0]; | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| size_t elem_num = outputs[0]->size / sizeof(T); | |||
| for (size_t i = 0; i < elem_num; i++) { | |||
| T val_ = start_ + static_cast<T>(i) * delta_; | |||
| if (val_ > limit_) { | |||
| T val = start + static_cast<T>(i) * delta; | |||
| if (val > limit) { | |||
| break; | |||
| } | |||
| output_addr[i] = val_; | |||
| output_addr[i] = val; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANGE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANGE_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| @@ -31,10 +33,11 @@ class RangeCPUKernel : public CPUKernel { | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| template <typename T> | |||
| bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| private: | |||
| template <typename T> | |||
| bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| }; | |||
| @@ -23,13 +23,23 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr size_t kReduceSmallVectorSize = 200000; | |||
| constexpr size_t kReduceInputsNum = 1; | |||
| constexpr size_t kReduceOutputsNum = 1; | |||
| } // namespace | |||
| template <typename T> | |||
| void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS); | |||
| auto prim = AnfAlgo::GetCNodePrimitive(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| auto axis_addr = prim->GetAttr(AXIS); | |||
| if (axis_addr == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Miss attribute " << AXIS; | |||
| } | |||
| if (axis_addr->isa<ValueTuple>() || axis_addr->isa<ValueList>()) { | |||
| axis_ = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, AXIS); | |||
| } else if (axis_addr->isa<Int64Imm>()) { | |||
| @@ -39,8 +49,8 @@ void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| } | |||
| int dimension = input_shape_.size(); | |||
| std::transform(axis_.begin(), axis_.end(), axis_.begin(), | |||
| [dimension](const auto &a) { return a < 0 ? dimension + a : a; }); | |||
| (void)std::transform(axis_.begin(), axis_.end(), axis_.begin(), | |||
| [dimension](const auto &a) { return a < 0 ? dimension + a : a; }); | |||
| sort(axis_.begin(), axis_.end()); | |||
| // Delete the duplicate axis. | |||
| auto last = std::unique(axis_.begin(), axis_.end()); | |||
| @@ -48,30 +58,30 @@ void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if constexpr (std::is_same<T, bool>::value) { | |||
| if (kernel_name == "ReduceAll") { | |||
| if (kernel_name_ == prim::kPrimReduceAll->name()) { | |||
| reduce_type_ = kReduceAll; | |||
| reduce_func_ = [](const T *input, size_t pos, T *out) { *out &= input[pos]; }; | |||
| } else if (kernel_name == "ReduceAny") { | |||
| } else if (kernel_name_ == prim::kPrimReduceAny->name()) { | |||
| reduce_type_ = kReduceAny; | |||
| reduce_func_ = [](const T *input, size_t pos, T *out) { *out |= input[pos]; }; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << fullname_ << " for bool."; | |||
| MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << kernel_name_ << " for bool."; | |||
| } | |||
| } else { | |||
| if (kernel_name == "ReduceMax") { | |||
| if (kernel_name_ == prim::kPrimReduceMax->name()) { | |||
| reduce_type_ = kReduceMax; | |||
| reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::max(input[pos], *out); }; | |||
| } else if (kernel_name == "ReduceMin") { | |||
| } else if (kernel_name_ == prim::kPrimReduceMin->name()) { | |||
| reduce_type_ = kReduceMin; | |||
| reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::min(input[pos], *out); }; | |||
| } else if (kernel_name == "ReduceSum") { | |||
| } else if (kernel_name_ == prim::kPrimReduceSum->name()) { | |||
| reduce_type_ = kReduceSum; | |||
| reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; }; | |||
| } else if (kernel_name == "ReduceMean") { | |||
| } else if (kernel_name_ == prim::kPrimReduceMean->name()) { | |||
| reduce_type_ = kReduceMean; | |||
| reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; }; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << kernel_name; | |||
| MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << kernel_name_; | |||
| } | |||
| } | |||
| @@ -87,13 +97,11 @@ void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||
| template <typename T> | |||
| bool ReduceCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| CHECK_KERNEL_INPUTS_NUM(inputs.size(), kReduceInputsNum, kernel_name_); | |||
| CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kReduceOutputsNum, kernel_name_); | |||
| size_t input_size = inputs[0]->size / sizeof(T); | |||
| if (input_size == 0) { | |||
| MS_LOG(EXCEPTION) << "Input data size is 0."; | |||
| } | |||
| auto input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr); | |||
| auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr); | |||
| if (axis_.empty() || input_shape_.empty() || input_shape_.size() == 1) { | |||
| if (input_size < kReduceSmallVectorSize) { | |||
| // Get one ret | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,8 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <string> | |||