| @@ -81,7 +81,6 @@ include_directories(${CCSRC_DIR}) | |||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | |||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/runtime/kernel/arm) | |||
| include_directories(${TOP_DIR}/third_party) | |||
| include_directories(${TOP_DIR}/third_party/flatbuffers/include) | |||
| include_directories(${CMAKE_BINARY_DIR}) | |||
| include(${TOP_DIR}/cmake/utils.cmake) | |||
| @@ -27,16 +27,6 @@ | |||
| namespace mindspore { | |||
| namespace session { | |||
| /// \brief CallBackParam defined input arguments for callBack function. | |||
| struct CallBackParam { | |||
| std::string node_name; /**< node name argument */ | |||
| std::string node_type; /**< node type argument */ | |||
| }; | |||
| /// \brief KernelCallBack defined the function pointer for callBack. | |||
| using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs, | |||
| std::vector<tensor::MSTensor *> outputs, const CallBackParam &opInfo)>; | |||
| /// \brief LiteSession defined session in MindSpore Lite for compiling Model and forwarding model. | |||
| class MS_API LiteSession { | |||
| public: | |||
| @@ -17,9 +17,11 @@ | |||
| #ifndef MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_ | |||
| #define MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_ | |||
| #include <functional> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "ir/dtype/type_id.h" | |||
| namespace mindspore { | |||
| @@ -74,5 +76,14 @@ class MS_API MSTensor { | |||
| virtual void *MutableData() = 0; | |||
| }; | |||
| } // namespace tensor | |||
| /// \brief CallBackParam defined input arguments for callBack function. | |||
| struct CallBackParam { | |||
| std::string node_name; /**< node name argument */ | |||
| std::string node_type; /**< node type argument */ | |||
| }; | |||
| /// \brief KernelCallBack defined the function pointer for callBack. | |||
| using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs, | |||
| std::vector<tensor::MSTensor *> outputs, const CallBackParam &opInfo)>; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_ | |||
| @@ -29,6 +29,7 @@ set(LITE_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/inner_context.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel_registry.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_kernel.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/populate_parameter.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc | |||
| @@ -14,11 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include <stdlib.h> | |||
| #include "src/common/file_utils.h" | |||
| #include <fcntl.h> | |||
| #include <stdlib.h> | |||
| #include <climits> | |||
| #include <cmath> | |||
| #include "src/common/file_utils.h" | |||
| #include "securec/include/securec.h" | |||
| namespace mindspore { | |||
| @@ -78,7 +78,7 @@ std::string RealPath(const char *path) { | |||
| char *real_path = realpath(path, resolvedPath.get()); | |||
| #endif | |||
| if (real_path == nullptr || strlen(real_path) == 0) { | |||
| MS_LOG(ERROR) << "Proto file path is not valid"; | |||
| MS_LOG(ERROR) << "file path is not valid : " << path; | |||
| return ""; | |||
| } | |||
| std::string res = resolvedPath.get(); | |||
| @@ -19,10 +19,7 @@ | |||
| #include "include/errorcode.h" | |||
| namespace mindspore::lite { | |||
| int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors, | |||
| std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, | |||
| const session::KernelCallBack &before, const session::KernelCallBack &after) { | |||
| MS_ASSERT(nullptr != allocator); | |||
| int Executor::CheckInputs(std::vector<Tensor *> &in_tensors) { | |||
| for (auto &inTensor : in_tensors) { | |||
| if (inTensor == nullptr) { | |||
| MS_LOG(ERROR) << "Graph input tensor is nullptr"; | |||
| @@ -32,10 +29,18 @@ int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_ | |||
| MS_LOG(ERROR) << "Graph input tensor data is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| if (inTensor->GetFormat() != schema::Format::Format_NHWC) { | |||
| MS_LOG(ERROR) << "Model input tensor should be NHWC"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors, | |||
| std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, const KernelCallBack &before, | |||
| const KernelCallBack &after) { | |||
| MS_ASSERT(nullptr != allocator); | |||
| auto ret = this->CheckInputs(in_tensors); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "CheckInputs failed"; | |||
| return ret; | |||
| } | |||
| kernel::LiteKernelUtil::InitTensorRefCount(kernels); | |||
| for (auto out_tensor : out_tensors) { // increase RefCount of output tensors, such that Run will not free them | |||
| @@ -44,34 +49,20 @@ int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_ | |||
| for (auto *kernel : kernels) { | |||
| MS_ASSERT(nullptr != kernel); | |||
| if (before != nullptr) { | |||
| if (!before(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), | |||
| {kernel->name(), kernel->type_str()})) { | |||
| MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name(); | |||
| } | |||
| ret = kernel->PreProcess(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel->name(); | |||
| return ret; | |||
| } | |||
| auto ret = kernel->Run(); | |||
| if (0 != ret) { | |||
| ret = kernel->Run(before, after); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); | |||
| return ret; | |||
| } | |||
| if (after != nullptr) { | |||
| if (!after(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), | |||
| {kernel->name(), kernel->type_str()})) { | |||
| MS_LOG(ERROR) << "run kernel after_callback failed, name: " << kernel->name(); | |||
| } | |||
| } | |||
| for (auto input_kernel : kernel->in_kernels()) { | |||
| MS_ASSERT(input_kernel != nullptr); | |||
| if (input_kernel->is_model_output()) { | |||
| continue; | |||
| } | |||
| ret = input_kernel->DecOutTensorRefCount(); | |||
| if (0 != ret) { | |||
| MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed"; | |||
| } | |||
| ret = kernel->PostProcess(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name(); | |||
| return ret; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -99,9 +90,9 @@ int Executor::TransformTensorLayoutFp32(Tensor *tensor, schema::Format dst_forma | |||
| MS_ASSERT(4 == tensor->shape().size()); | |||
| auto src_format = tensor->GetFormat(); | |||
| if (src_format == schema::Format::Format_NC4HW4 && dst_format == schema::Format::Format_NHWC) { | |||
| auto *src_data = tensor->MutableData(); | |||
| auto *src_data = tensor->data_c(); | |||
| if (src_data == nullptr) { | |||
| MS_LOG(ERROR) << "MutableData return nullptr"; | |||
| MS_LOG(ERROR) << "data of tensor is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| auto *dst_data = allocator->Malloc(tensor->Size()); | |||
| @@ -28,13 +28,15 @@ class Executor { | |||
| Executor() = default; | |||
| virtual ~Executor() = default; | |||
| virtual int Prepare(std::vector<kernel::LiteKernel *> &kernels) { return 0; } | |||
| virtual int Prepare(const std::vector<kernel::LiteKernel *> &kernels) { return 0; } | |||
| virtual int Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors, | |||
| std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr, | |||
| const session::KernelCallBack &before = nullptr, const session::KernelCallBack &after = nullptr); | |||
| const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); | |||
| protected: | |||
| int CheckInputs(std::vector<Tensor *> &in_tensors); | |||
| int TransformTensorLayoutFp32(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr); | |||
| int TransformTensorLayoutUint8(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr); | |||
| @@ -19,12 +19,21 @@ | |||
| #include "src/common/log_adapter.h" | |||
| namespace mindspore::lite { | |||
| InnerContext::InnerContext(const Context *context) { | |||
| this->allocator = context->allocator; | |||
| this->thread_num_ = context->thread_num_; | |||
| this->device_list_.clear(); | |||
| for (auto &device_ctx : context->device_list_) { | |||
| this->device_list_.push_back(device_ctx); | |||
| } | |||
| } | |||
| int InnerContext::Init() { | |||
| if (this->device_list_.empty()) { | |||
| MS_LOG(ERROR) << "Device list is empty."; | |||
| if (RET_OK != this->IsValid()) { | |||
| MS_LOG(ERROR) << "Context is not valid"; | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| if (this->thread_pool_ == nullptr && this->device_list_[0].device_type_ == DT_CPU) { | |||
| if (this->thread_pool_ == nullptr && this->IsCpuEnabled()) { | |||
| this->thread_pool_ = | |||
| CreateLiteThreadPool(this->thread_num_, this->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_); | |||
| if (this->thread_pool_ == nullptr) { | |||
| @@ -49,4 +58,74 @@ InnerContext::~InnerContext() { | |||
| this->thread_pool_ = NULL; | |||
| } | |||
| } | |||
| int InnerContext::IsValid() { | |||
| if (this->device_list_.empty()) { | |||
| MS_LOG(ERROR) << "Device list is empty."; | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| #ifndef SUPPORT_GPU | |||
| if (IsGpuEnabled()) { | |||
| MS_LOG(ERROR) << "GPU is not supported."; | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| #endif | |||
| if (IsNpuEnabled()) { | |||
| MS_LOG(ERROR) << "NPU is not supported."; | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| bool InnerContext::IsCpuFloat16Enabled() { | |||
| if (!IsCpuEnabled()) { | |||
| return false; | |||
| } | |||
| return GetCpuInfo().enable_float16_; | |||
| } | |||
| bool InnerContext::IsGpuFloat16Enabled() { | |||
| if (!IsGpuEnabled()) { | |||
| return false; | |||
| } | |||
| return GetGpuInfo().enable_float16_; | |||
| } | |||
| bool InnerContext::IsCpuEnabled() { | |||
| return this->device_list_.end() != | |||
| std::find_if(this->device_list_.begin(), this->device_list_.end(), | |||
| [](const DeviceContext &device) { return device.device_type_ == DT_CPU; }); | |||
| } | |||
| bool InnerContext::IsGpuEnabled() { | |||
| return this->device_list_.end() != | |||
| std::find_if(this->device_list_.begin(), this->device_list_.end(), | |||
| [](const DeviceContext &device) { return device.device_type_ == DT_GPU; }); | |||
| } | |||
| bool InnerContext::IsNpuEnabled() { | |||
| return this->device_list_.end() != | |||
| std::find_if(this->device_list_.begin(), this->device_list_.end(), | |||
| [](const DeviceContext &device) { return device.device_type_ == DT_NPU; }); | |||
| } | |||
| CpuDeviceInfo InnerContext::GetCpuInfo() { | |||
| auto iter = std::find_if(this->device_list_.begin(), this->device_list_.end(), | |||
| [](const DeviceContext &device) { return device.device_type_ == DT_CPU; }); | |||
| if (iter == this->device_list_.end()) { | |||
| return {}; | |||
| } else { | |||
| return iter->device_info_.cpu_device_info_; | |||
| } | |||
| } | |||
| GpuDeviceInfo InnerContext::GetGpuInfo() { | |||
| auto iter = std::find_if(this->device_list_.begin(), this->device_list_.end(), | |||
| [](const DeviceContext &device) { return device.device_type_ == DT_GPU; }); | |||
| if (iter == this->device_list_.end()) { | |||
| return {}; | |||
| } else { | |||
| return iter->device_info_.gpu_device_info_; | |||
| } | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -27,8 +27,28 @@ struct InnerContext : public Context { | |||
| struct ThreadPool *thread_pool_ = nullptr; | |||
| public: | |||
| InnerContext() = default; | |||
| explicit InnerContext(const Context *context); | |||
| int Init(); | |||
| bool IsCpuFloat16Enabled(); | |||
| bool IsGpuFloat16Enabled(); | |||
| bool IsCpuEnabled(); | |||
| bool IsGpuEnabled(); | |||
| bool IsNpuEnabled(); | |||
| CpuDeviceInfo GetCpuInfo(); | |||
| GpuDeviceInfo GetGpuInfo(); | |||
| int IsValid(); | |||
| virtual ~InnerContext(); | |||
| }; | |||
| } // namespace mindspore::lite | |||
| @@ -117,8 +117,9 @@ kernel::LiteKernel *KernelRegistry::GetKernel(const std::vector<Tensor *> &in_te | |||
| if (creator != nullptr) { | |||
| auto kernel = creator(in_tensors, out_tensors, parameter, ctx, key, primitive); | |||
| if (kernel != nullptr) { | |||
| return kernel; | |||
| kernel->set_desc(key); | |||
| } | |||
| return kernel; | |||
| } | |||
| return nullptr; | |||
| } | |||
| @@ -16,8 +16,11 @@ | |||
| #include "src/lite_kernel.h" | |||
| #include <algorithm> | |||
| #include "src/tensor.h" | |||
| namespace mindspore::kernel { | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| void *LiteKernel::workspace_ = nullptr; | |||
| @@ -54,7 +57,21 @@ int LiteKernel::DecOutTensorRefCount() { | |||
| return 0; | |||
| } | |||
| int LiteKernel::Prepare() { | |||
| int LiteKernel::FreeWorkTensor() const { | |||
| for (auto input_kernel : this->in_kernels()) { | |||
| MS_ASSERT(input_kernel != nullptr); | |||
| if (input_kernel->is_model_output()) { | |||
| continue; | |||
| } | |||
| auto ret = input_kernel->DecOutTensorRefCount(); | |||
| if (0 != ret) { | |||
| MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << this->name() << " failed"; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int LiteKernel::PreProcess() { | |||
| if (!InferShapeDone()) { | |||
| (const_cast<mindspore::lite::PrimitiveC *>(primitive_))->SetInferFlag(true); | |||
| auto ret = (const_cast<mindspore::lite::PrimitiveC *>(primitive_))->InferShape(in_tensors_, out_tensors_); | |||
| @@ -70,7 +87,7 @@ int LiteKernel::Prepare() { | |||
| } | |||
| } | |||
| auto &outputs = this->out_tensors(); | |||
| auto outputs = this->out_tensors(); | |||
| for (auto *output : outputs) { | |||
| MS_ASSERT(output != nullptr); | |||
| output->MallocData(); | |||
| @@ -78,6 +95,50 @@ int LiteKernel::Prepare() { | |||
| return RET_OK; | |||
| } | |||
| int LiteKernel::Run(const KernelCallBack &before, const KernelCallBack &after) { | |||
| if (before != nullptr) { | |||
| if (!before(TensorVectorCast(this->in_tensors_), TensorVectorCast(this->out_tensors_), | |||
| {this->name_, this->type_str()})) { | |||
| MS_LOG(WARNING) << "run kernel before_callback failed, name: " << this->name_; | |||
| } | |||
| } | |||
| auto ret = Run(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "run kernel failed, name: " << this->name_; | |||
| return ret; | |||
| } | |||
| if (after != nullptr) { | |||
| if (!after(TensorVectorCast(this->in_tensors_), TensorVectorCast(this->out_tensors_), | |||
| {this->name_, this->type_str()})) { | |||
| MS_LOG(ERROR) << "run kernel after_callback failed, name: " << this->name_; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| std::string LiteKernel::ToString() const { | |||
| std::ostringstream oss; | |||
| oss << "LiteKernel: " << this->name_; | |||
| oss << ", Type: " << this->type_str(); | |||
| oss << std::endl << this->in_tensors_.size() << " InputTensors:"; | |||
| for (auto tensor : in_tensors_) { | |||
| oss << " " << tensor << ":" << tensor->ToString(); | |||
| } | |||
| oss << std::endl << this->out_tensors_.size() << " OutputTensors:"; | |||
| for (auto tensor : out_tensors_) { | |||
| oss << " " << tensor << ":" << tensor->ToString(); | |||
| } | |||
| oss << std::endl << this->in_kernels_.size() << " InputKernels:"; | |||
| for (auto in_kernel : in_kernels_) { | |||
| oss << " " << in_kernel->name_; | |||
| } | |||
| oss << std::endl << this->out_kernels_.size() << " OutputKernels:"; | |||
| for (auto out_kernel : out_kernels_) { | |||
| oss << " " << out_kernel->name_; | |||
| } | |||
| return oss.str(); | |||
| } | |||
| std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphInputKernels( | |||
| const std::vector<kernel::LiteKernel *> &kernels) { | |||
| std::vector<kernel::LiteKernel *> input_kernels; | |||
| @@ -87,10 +148,11 @@ std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphInputKernels( | |||
| continue; | |||
| } | |||
| for (const auto &input : kernel->in_kernels()) { | |||
| auto iter = std::find(kernels.begin(), kernels.end(), input); | |||
| auto item = std::find(input_kernels.begin(), input_kernels.end(), kernel); | |||
| if (iter == kernels.end() && item == input_kernels.end()) { | |||
| auto in_kernel_in_graph = std::find(kernels.begin(), kernels.end(), input); | |||
| auto in_kernel_in_ret = std::find(input_kernels.begin(), input_kernels.end(), kernel); | |||
| if (in_kernel_in_graph == kernels.end() && in_kernel_in_ret == input_kernels.end()) { | |||
| input_kernels.emplace_back(kernel); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| @@ -106,10 +168,11 @@ std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphOutputKernels( | |||
| continue; | |||
| } | |||
| for (const auto &output : kernel->out_kernels()) { | |||
| auto iter = std::find(kernels.begin(), kernels.end(), output); | |||
| auto item = std::find(output_kernels.begin(), output_kernels.end(), kernel); | |||
| if (iter == kernels.end() && item == output_kernels.end()) { | |||
| auto out_kernel_in_graph = std::find(kernels.begin(), kernels.end(), output); | |||
| auto out_kernel_in_ret = std::find(output_kernels.begin(), output_kernels.end(), kernel); | |||
| if (out_kernel_in_graph == kernels.end() && out_kernel_in_ret == output_kernels.end()) { | |||
| output_kernels.emplace_back(kernel); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| @@ -120,7 +183,8 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphInputTensors(const std::vect | |||
| std::vector<lite::Tensor *> input_tensors; | |||
| std::vector<lite::Tensor *> all_output_tensors; | |||
| for (const auto &kernel : kernels) { | |||
| all_output_tensors.insert(all_output_tensors.end(), kernel->out_tensors().begin(), kernel->out_tensors().end()); | |||
| auto kernel_out_tensors = kernel->out_tensors(); | |||
| all_output_tensors.insert(all_output_tensors.end(), kernel_out_tensors.begin(), kernel_out_tensors.end()); | |||
| } | |||
| std::vector<kernel::LiteKernel *> input_kernels = SubgraphInputKernels(kernels); | |||
| for (const auto &kernel : input_kernels) { | |||
| @@ -139,7 +203,8 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vec | |||
| std::vector<lite::Tensor *> output_tensors; | |||
| std::vector<lite::Tensor *> all_input_tensors; | |||
| for (const auto &kernel : kernels) { | |||
| all_input_tensors.insert(all_input_tensors.end(), kernel->in_tensors().begin(), kernel->in_tensors().end()); | |||
| auto kernel_in_tensors = kernel->in_tensors(); | |||
| all_input_tensors.insert(all_input_tensors.end(), kernel_in_tensors.begin(), kernel_in_tensors.end()); | |||
| } | |||
| std::vector<kernel::LiteKernel *> output_kernels = SubgraphOutputKernels(kernels); | |||
| for (const auto &kernel : output_kernels) { | |||
| @@ -153,8 +218,12 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vec | |||
| return output_tensors; | |||
| } | |||
| void LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> &kernels) { | |||
| void LiteKernelUtil::InitIOKernels(std::vector<kernel::LiteKernel *> &kernels) { | |||
| for (auto *kernel : kernels) { | |||
| // clean io kernels | |||
| kernel->SetInKernel({}); | |||
| kernel->SetOutKernel({}); | |||
| // find io kernels | |||
| for (auto *search_kernel : kernels) { | |||
| if (search_kernel == kernel) { | |||
| continue; | |||
| @@ -19,6 +19,7 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <utility> | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/common/utils.h" | |||
| #ifdef ENABLE_ARM | |||
| @@ -32,9 +33,7 @@ | |||
| static constexpr int kPerTensor = 1; | |||
| namespace mindspore::kernel { | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| enum KERNEL_ARCH { kCPU, kGPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU }; | |||
| enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU }; | |||
| struct KernelKey { | |||
| KERNEL_ARCH arch; | |||
| TypeId data_type; | |||
| @@ -51,16 +50,17 @@ struct KernelKey { | |||
| } | |||
| }; | |||
| enum SubGraphType { kNotSubGraph = 0, kCpuFP32SubGraph, kCpuFP16SubGraph, kGpuSubGraph, kNpuSubGraph, kApuSubGraph }; | |||
| class LiteKernel { | |||
| public: | |||
| LiteKernel() = default; | |||
| // parameter should be deleted or freed by caller, and should be deleted or freed after LiteKernel is deleted | |||
| LiteKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &in_tensors, | |||
| const std::vector<lite::Tensor *> &out_tensors, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors, | |||
| const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) | |||
| : op_parameter_(parameter), | |||
| in_tensors_(in_tensors), | |||
| out_tensors_(out_tensors), | |||
| in_tensors_(std::move(in_tensors)), | |||
| out_tensors_(std::move(out_tensors)), | |||
| primitive_(primitive), | |||
| context_(ctx) { | |||
| if (op_parameter_ != nullptr && ctx != nullptr) { | |||
| @@ -77,15 +77,22 @@ class LiteKernel { | |||
| } | |||
| } | |||
| virtual int Prepare(); | |||
| // called while compiling graph | |||
| virtual int Prepare() { return mindspore::lite::RET_OK; } | |||
| // called before Run | |||
| virtual int PreProcess(); | |||
| virtual int Run() { return mindspore::lite::RET_ERROR; } | |||
| virtual int Init() { return -1; } | |||
| virtual int Run(const KernelCallBack &before, const KernelCallBack &after); | |||
| // called after Run | |||
| virtual int PostProcess() { return FreeWorkTensor(); } | |||
| virtual int ReSize() { return -1; } | |||
| virtual int ReSize() { return mindspore::lite::RET_ERROR; } | |||
| virtual int Run() { return -1; } | |||
| virtual int Init() { return mindspore::lite::RET_ERROR; } | |||
| std::string name() { return this->name_; } | |||
| std::string name() const { return this->name_; } | |||
| virtual void train() { train_mode_ = true; } | |||
| @@ -101,20 +108,20 @@ class LiteKernel { | |||
| bool is_model_output() const { return this->is_model_output_; } | |||
| schema::PrimitiveType Type() { | |||
| schema::PrimitiveType Type() const { | |||
| return (this->op_parameter_ != nullptr) ? schema::PrimitiveType(this->op_parameter_->type_) | |||
| : schema::PrimitiveType_NONE; | |||
| } | |||
| std::string type_str() { return schema::EnumNamePrimitiveType(this->Type()); } | |||
| std::string type_str() const { return schema::EnumNamePrimitiveType(this->Type()); } | |||
| void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) { this->in_tensors_ = in_tensors; } | |||
| void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) { this->out_tensors_ = out_tensors; } | |||
| std::vector<lite::Tensor *> &in_tensors() { return this->in_tensors_; } | |||
| std::vector<lite::Tensor *> in_tensors() const { return this->in_tensors_; } | |||
| std::vector<lite::Tensor *> &out_tensors() { return this->out_tensors_; } | |||
| std::vector<lite::Tensor *> out_tensors() const { return this->out_tensors_; } | |||
| void AddInKernel(LiteKernel *kernel) { | |||
| if (!lite::IsContain(this->in_kernels_, kernel)) { | |||
| @@ -132,14 +139,16 @@ class LiteKernel { | |||
| void SetOutKernel(const std::vector<LiteKernel *> &kernel) { this->out_kernels_ = kernel; } | |||
| std::vector<LiteKernel *> &in_kernels() { return this->in_kernels_; } | |||
| std::vector<LiteKernel *> in_kernels() const { return this->in_kernels_; } | |||
| std::vector<LiteKernel *> &out_kernels() { return this->out_kernels_; } | |||
| std::vector<LiteKernel *> out_kernels() const { return this->out_kernels_; } | |||
| void InitOutTensorRefCount(); | |||
| int DecOutTensorRefCount(); | |||
| int FreeWorkTensor() const; | |||
| KernelKey desc() const { return desc_; } | |||
| void set_desc(const KernelKey kernel_key) { desc_ = kernel_key; } | |||
| @@ -151,10 +160,14 @@ class LiteKernel { | |||
| static void FreeWorkspace(); | |||
| void *GetWorkspace() { return workspace_; } | |||
| SubGraphType subgraph_type() const { return this->subgraph_type_; } | |||
| virtual std::string ToString() const; | |||
| protected: | |||
| bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->GetInferFlag()) && true; } | |||
| bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->GetInferFlag()); } | |||
| KernelKey desc_; | |||
| KernelKey desc_{}; | |||
| std::string name_; | |||
| OpParameter *op_parameter_ = nullptr; | |||
| // tensor will free in ~lite_session() | |||
| @@ -168,27 +181,7 @@ class LiteKernel { | |||
| bool is_model_output_ = false; | |||
| size_t workspace_size_ = 0; | |||
| static void *workspace_; | |||
| }; | |||
| class SubGraphKernel : public LiteKernel { | |||
| public: | |||
| explicit SubGraphKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<kernel::LiteKernel *> &in_kernels, | |||
| const std::vector<kernel::LiteKernel *> &out_kernels, | |||
| const std::vector<kernel::LiteKernel *> &nodes, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(nullptr, inputs, outputs, ctx, primitive), nodes_(nodes) { | |||
| in_kernels_ = in_kernels; | |||
| out_kernels_ = out_kernels; | |||
| } | |||
| virtual int Init() { return -1; } | |||
| virtual int InferShape() { return -1; } | |||
| virtual int ReSize() { return -1; } | |||
| virtual int Run() { return -1; } | |||
| protected: | |||
| std::vector<LiteKernel *> nodes_; | |||
| SubGraphType subgraph_type_ = kNotSubGraph; | |||
| }; | |||
| typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -198,7 +191,7 @@ typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs, | |||
| class LiteKernelUtil { | |||
| public: | |||
| static void TopologicalSortKernels(std::vector<kernel::LiteKernel *> &kernels); | |||
| static void InitIOKernels(std::vector<kernel::LiteKernel *> &kernels); | |||
| static std::vector<kernel::LiteKernel *> SubgraphInputKernels(const std::vector<kernel::LiteKernel *> &kernels); | |||
| @@ -295,13 +295,13 @@ int LiteSession::CompileGraph(Model *model) { | |||
| std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const { return this->input_vec_; } | |||
| int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) { | |||
| int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) { | |||
| bool expected = false; | |||
| if (!is_running_.compare_exchange_strong(expected, true)) { | |||
| MS_LOG(ERROR) << "Not support multi-threading"; | |||
| return RET_ERROR; | |||
| } | |||
| STATUS ret = RET_ERROR; | |||
| STATUS ret; | |||
| MS_ASSERT(this->context_); | |||
| if (before == nullptr && after == nullptr) { | |||
| ret = executor->Run(this->inputs_, this->outputs_, this->kernels_, this->context_->allocator.get()); | |||
| @@ -325,39 +325,12 @@ int LiteSession::Init(Context *context) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| if (context->device_list_.empty()) { | |||
| MS_LOG(ERROR) << "Device list is empty."; | |||
| is_running_.store(false); | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| auto &device_type = context->device_list_[0].device_type_; | |||
| if (device_type == DT_NPU) { | |||
| MS_LOG(ERROR) << "NPU is not supported."; | |||
| is_running_.store(false); | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| #ifndef SUPPORT_GPU | |||
| if (device_type == DT_GPU) { | |||
| MS_LOG(ERROR) << "GPU is not supported."; | |||
| is_running_.store(false); | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| #endif | |||
| this->context_ = new (std::nothrow) InnerContext(); | |||
| this->context_ = new (std::nothrow) InnerContext(context); | |||
| if (this->context_ == nullptr) { | |||
| MS_LOG(ERROR) << "New Context failed"; | |||
| is_running_.store(false); | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| this->context_->allocator = context->allocator; | |||
| this->context_->thread_num_ = context->thread_num_; | |||
| this->context_->device_list_.clear(); | |||
| for (auto &device_ctx : context->device_list_) { | |||
| this->context_->device_list_.push_back(device_ctx); | |||
| } | |||
| auto ret = this->context_->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init Context failed"; | |||
| @@ -371,12 +344,11 @@ int LiteSession::Init(Context *context) { | |||
| return ret; | |||
| } | |||
| #if SUPPORT_GPU | |||
| if (device_type == DT_GPU) { | |||
| auto gpu_device_info = this->context_->device_list_[0].device_info_.gpu_device_info_; | |||
| if (this->context_->IsGpuEnabled()) { | |||
| auto gpu_device_info = this->context_->GetGpuInfo(); | |||
| auto opencl_runtime = ocl_runtime_wrap_.GetInstance(); | |||
| opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_); | |||
| if (opencl_runtime->Init() != RET_OK) { | |||
| device_type = DT_CPU; | |||
| MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode."; | |||
| } else { | |||
| MS_LOG(INFO) << "Init OpenCL runtime success."; | |||
| @@ -398,14 +370,13 @@ void LiteSession::BindThread(bool if_bind) { | |||
| MS_LOG(ERROR) << "Device list is empty."; | |||
| return; | |||
| } | |||
| auto &device_ctx = this->context_->device_list_[0]; | |||
| if (device_ctx.device_type_ != DT_CPU) { | |||
| MS_LOG(ERROR) << "Device is not CPU."; | |||
| if (this->context_->IsCpuEnabled()) { | |||
| return; | |||
| } | |||
| if (device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ != NO_BIND) { | |||
| auto cpu_device_info = this->context_->GetCpuInfo(); | |||
| if (cpu_device_info.cpu_bind_mode_ != NO_BIND) { | |||
| MS_ASSERT(this->context_->thread_pool_ != NULL); | |||
| BindThreads(this->context_->thread_pool_, if_bind, device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_); | |||
| BindThreads(this->context_->thread_pool_, if_bind, cpu_device_info.cpu_bind_mode_); | |||
| } | |||
| } | |||
| @@ -52,8 +52,7 @@ class LiteSession : public session::LiteSession { | |||
| mindspore::tensor::MSTensor *GetInputsByTensorName(const std::string &name) const override; | |||
| int RunGraph(const session::KernelCallBack &before = nullptr, | |||
| const session::KernelCallBack &after = nullptr) override; | |||
| int RunGraph(const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override; | |||
| std::vector<mindspore::tensor::MSTensor *> GetOutputsByNodeName(const std::string &node_name) const override; | |||
| @@ -163,11 +163,6 @@ int RunPriorBox(void *cdata, int task_id) { | |||
| } | |||
| int PriorBoxCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code[" << prepare_ret << "]"; | |||
| return prepare_ret; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, RunPriorBox, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]"; | |||
| @@ -140,12 +140,6 @@ int QuantDTypeCastRun(void *cdata, int task_id) { | |||
| } | |||
| int QuantDTypeCastCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 && | |||
| out_tensors_[0]->data_type() == TypeId::kNumberTypeFloat32) { | |||
| int8_ptr_ = reinterpret_cast<int8_t *>(in_tensors_[0]->data_c()); | |||
| @@ -91,24 +91,18 @@ int StridedSliceCPUKernel::HandleMultiInputs() { | |||
| } | |||
| int StridedSliceCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| auto input = in_tensors_.at(0); | |||
| auto output = out_tensors_.at(0); | |||
| MS_ASSERT(input); | |||
| MS_ASSERT(output); | |||
| if (in_tensors().size() == kMultiInputsSize) { | |||
| ret = HandleMultiInputs(); | |||
| auto ret = HandleMultiInputs(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| } | |||
| ret = DoStridedSlice(input->MutableData(), output->MutableData(), | |||
| reinterpret_cast<StridedSliceParameter *>(op_parameter_)); | |||
| auto ret = DoStridedSlice(input->MutableData(), output->MutableData(), | |||
| reinterpret_cast<StridedSliceParameter *>(op_parameter_)); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "StridedSlice error error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -103,9 +103,9 @@ int ActivationFp16Run(void *cdata, int task_id) { | |||
| } | |||
| int ActivationFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| auto ret = MallocTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| MS_LOG(ERROR) << "MallocTmpBuffer failed."; | |||
| return ret; | |||
| } | |||
| @@ -185,11 +185,6 @@ static int ArithmeticsRunFp16(void *cdata, int task_id) { | |||
| } | |||
| int ArithmeticFP16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| auto output_tensor = out_tensors_.at(0); | |||
| is_input0_fp32_ = in_tensors_.at(0)->data_type() == kNumberTypeFloat32; | |||
| is_input1_fp32_ = in_tensors_.at(1)->data_type() == kNumberTypeFloat32; | |||
| @@ -203,7 +198,7 @@ int ArithmeticFP16CPUKernel::Run() { | |||
| FreeTmpBuffer(); | |||
| return RET_ERROR; | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRunFp16, this, context_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRunFp16, this, context_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]"; | |||
| } | |||
| @@ -20,6 +20,8 @@ | |||
| #include "nnacl/fp16/arithmetic_self_fp16.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| namespace { | |||
| @@ -81,11 +83,6 @@ void ArithmeticSelfFp16CPUKernel::FreeInputAndOutput() { | |||
| } | |||
| int ArithmeticSelfFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! ret: " << ret; | |||
| return ret; | |||
| } | |||
| auto input_tensor = in_tensors_.at(0); | |||
| auto output_tensor = out_tensors_.at(0); | |||
| input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, context_); | |||
| @@ -95,7 +92,7 @@ int ArithmeticSelfFp16CPUKernel::Run() { | |||
| MS_LOG(ERROR) << "input or output is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | |||
| } | |||
| @@ -21,6 +21,8 @@ | |||
| #include "src/kernel_registry.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_BatchNorm; | |||
| namespace mindspore::kernel { | |||
| @@ -47,11 +49,6 @@ int BatchnormFp16CPUKernel::InitConstTensor() { | |||
| } | |||
| int BatchnormFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; | |||
| return ret; | |||
| } | |||
| auto input_tensor = in_tensors_.at(0); | |||
| auto output_tensor = out_tensors_.at(0); | |||
| input_ = ConvertInputFp32toFp16(input_tensor, context_); | |||
| @@ -62,7 +59,7 @@ int BatchnormFp16CPUKernel::Run() { | |||
| return RET_ERROR; | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||
| } | |||
| @@ -76,7 +73,7 @@ int BatchnormFp16CPUKernel::Run() { | |||
| int BatchnormFp16CPUKernel::DoExecute(int task_id) { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| BatchNormFp16(input_, mean_, variance_, param, task_id, output_); | |||
| return mindspore::lite::RET_OK; | |||
| return RET_OK; | |||
| } | |||
| void BatchnormFp16CPUKernel::FreeInputAndOutput() { | |||
| @@ -83,11 +83,6 @@ int CastFp16CPUKernel::DoCast(int thread_id) { | |||
| } | |||
| int CastFp16CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| if (data_num_ == 0) { | |||
| return RET_OK; | |||
| } | |||
| @@ -91,12 +91,6 @@ void ConcatFp16CPUKernel::FreeTmpBuffer() { | |||
| } | |||
| int ConcatFp16CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto ret = MallocTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| FreeTmpBuffer(); | |||
| @@ -218,13 +218,7 @@ static int Convolution1x1Fp16RunHw(void *cdata, int task_id) { | |||
| } | |||
| int Convolution1x1FP16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get executor tensor failed."; | |||
| return ret; | |||
| @@ -248,10 +242,14 @@ int Convolution1x1FP16CPUKernel::Run() { | |||
| } | |||
| if (multi_thread_by_hw_) { | |||
| ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_); | |||
| ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_); | |||
| } else { | |||
| RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | |||
| ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunOc, this, thread_count_); | |||
| ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunOc, this, thread_count_); | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ParallelLaunch failed."; | |||
| return ret; | |||
| } | |||
| } | |||
| @@ -23,7 +23,8 @@ | |||
| #include "src/runtime/runtime_api.h" | |||
| namespace mindspore::kernel { | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| ConvolutionBaseFP16CPUKernel::~ConvolutionBaseFP16CPUKernel() { | |||
| if (fp16_weight_ != nullptr) { | |||
| free(fp16_weight_); | |||
| @@ -32,10 +32,10 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionBaseFP16CPUKernel() override; | |||
| int Init() override { return RET_OK; } | |||
| int ReSize() override { return RET_OK; } | |||
| int Run() override { return RET_OK; } | |||
| int RunImpl(int task_id) { return RET_OK; } | |||
| int Init() override { return mindspore::lite::RET_OK; } | |||
| int ReSize() override { return mindspore::lite::RET_OK; } | |||
| int Run() override { return mindspore::lite::RET_OK; } | |||
| int RunImpl(int task_id) { return mindspore::lite::RET_OK; } | |||
| virtual int GetExecuteTensor(); | |||
| virtual int GetExecuteFilter(); | |||
| virtual void IfCastOutput(); | |||
| @@ -110,12 +110,7 @@ static int ConvDwFp16Run(void *cdata, int task_id) { | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||
| return ret; | |||
| @@ -140,12 +140,7 @@ static int ConvDwSWFp16Run(void *cdata, int task_id) { | |||
| } | |||
| int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitBuffer(); | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| @@ -145,13 +145,7 @@ static int ConvolutionFp16Impl(void *cdata, int task_id) { | |||
| } | |||
| int ConvolutionFP16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||
| return ret; | |||
| @@ -211,12 +211,6 @@ static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) { | |||
| } | |||
| int ConvolutionWinogradFP16CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||
| @@ -61,11 +61,6 @@ static int CropFp16Run(void *cdata, int task_id) { | |||
| } | |||
| int CropFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(kInputIndex), context_); | |||
| if (input_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "input or output is nullptr"; | |||
| @@ -79,8 +74,11 @@ int CropFp16CPUKernel::Run() { | |||
| return RET_ERROR; | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, CropFp16Run, this, thread_count_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, CropFp16Run, this, thread_count_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ParallelLaunch failed: " << ret; | |||
| return ret; | |||
| } | |||
| if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) { | |||
| Float16ToFloat32(output_ptr_, reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()), | |||
| out_tensors_.at(kOutputIndex)->ElementsNum()); | |||
| @@ -156,12 +156,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||
| return RET_ERROR; | |||
| } | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitBuffer(); | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| @@ -179,11 +179,6 @@ int DeConvolutionFp16CPUKernel::Init() { | |||
| } | |||
| int DeConvolutionFp16CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| int error_code = InitRunBuf(); | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <arm_neon.h> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| #ifdef ENABLE_ARM64 | |||
| extern void Float32ToFloat16(const float *input, float16_t *output, int number); | |||
| extern void Float16ToFloat32(const float16_t *input, float *output, int number); | |||
| void Float32ToFloat16_fp16_handler(const void *input, void *output, int number) { | |||
| Float32ToFloat16(reinterpret_cast<const float *>(input), reinterpret_cast<float16_t *>(output), number); | |||
| } | |||
| void Float16ToFloat32_fp16_handler(const void *input, void *output, int number) { | |||
| Float16ToFloat32(reinterpret_cast<const float16_t *>(input), reinterpret_cast<float *>(output), number); | |||
| } | |||
| #endif | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -179,11 +179,6 @@ int FcFP16Run(void *cdata, int task_id) { | |||
| } | |||
| int FullconnectionFP16CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto out_tensor = out_tensors_[0]; | |||
| if (out_tensor->data_type() == kNumberTypeFloat32) { | |||
| output_ptr_ = output_fp16_; | |||
| @@ -20,6 +20,8 @@ | |||
| #include "src/kernel_registry.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_FusedBatchNorm; | |||
| namespace mindspore::kernel { | |||
| @@ -70,11 +72,11 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) { | |||
| context_->allocator->Free(mean_fp16); | |||
| context_->allocator->Free(variance_fp16); | |||
| context_->allocator->Free(output_fp16); | |||
| return mindspore::lite::RET_OK; | |||
| return RET_OK; | |||
| } | |||
| FusedBatchNormFp16(in_tensors_.at(0)->MutableData(), scale_, offset_, mean_, variance_, param, task_id, | |||
| out_tensors_.at(0)->MutableData()); | |||
| return mindspore::lite::RET_OK; | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuFusedBatchnormFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -210,11 +210,6 @@ int MatmulFP16Run(void *cdata, int task_id) { | |||
| } | |||
| int MatmulFP16CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto out_tensor = out_tensors_[0]; | |||
| float16_t *c_ptr = nullptr; | |||
| if (out_tensor->data_type() == kNumberTypeFloat32) { | |||
| @@ -33,11 +33,6 @@ int PadFp16CPUKernel::RunImpl(int task_id) { | |||
| } | |||
| int PadFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| auto input_tensor = in_tensors_.at(0); | |||
| auto output_tensor = out_tensors_.at(0); | |||
| is_input_fp32_ = input_tensor->data_type() == kNumberTypeFloat32; | |||
| @@ -58,7 +53,7 @@ int PadFp16CPUKernel::Run() { | |||
| output_[i] = pad_param_->constant_value_; | |||
| } | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||
| } | |||
| @@ -84,12 +84,6 @@ static int PoolingFp16Impl(void *cdata, int task_id) { | |||
| } | |||
| int PoolingFp16CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_tensor = in_tensors_.at(kInputIndex); | |||
| auto in_data_type_ = input_tensor->data_type(); | |||
| MS_ASSERT(in_data_type_ == kNumberTypeFloat32 || in_data_type_ == kNumberTypeFloat16); | |||
| @@ -76,12 +76,6 @@ static int ReduceFp16Impl(void *cdata, int task_id) { | |||
| } | |||
| int ReduceFp16CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto ret = MallocTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| FreeTmpBuffer(); | |||
| @@ -31,11 +31,6 @@ using mindspore::schema::PrimitiveType_Reshape; | |||
| namespace mindspore::kernel { | |||
| int ReshapeFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| auto in_tensor = in_tensors_.at(kInputIndex); | |||
| auto out_tensor = out_tensors_.at(kOutputIndex); | |||
| auto input_ptr = in_tensor->MutableData(); | |||
| @@ -103,12 +103,7 @@ int ScaleFp16Run(void *cdata, int task_id) { | |||
| } | |||
| int ScaleFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = InitScaleOffset(); | |||
| auto ret = InitScaleOffset(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed."; | |||
| return RET_ERROR; | |||
| @@ -20,6 +20,8 @@ | |||
| #include "nnacl/fp16/slice_fp16.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Slice; | |||
| namespace mindspore::kernel { | |||
| @@ -29,11 +31,6 @@ int SliceFp16CPUKernel::SliceParallelRun(int thread_id) { | |||
| } | |||
| int SliceFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| input_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); | |||
| output_fp16_ = MallocOutputFp16(out_tensors_.at(0), context_); | |||
| if (input_fp16_ == nullptr || output_fp16_ == nullptr) { | |||
| @@ -45,7 +42,7 @@ int SliceFp16CPUKernel::Run() { | |||
| DoSliceFp16NoParallel(input_fp16_, output_fp16_, param_); | |||
| return RET_OK; | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "slice launch fail!ret: " << ret; | |||
| } | |||
| @@ -104,12 +104,7 @@ void SoftmaxFp16CPUKernel::FreeTmpBuffer() { | |||
| } | |||
| int SoftmaxFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return RET_ERROR; | |||
| } | |||
| ret = MallocTmpBuffer(); | |||
| auto ret = MallocTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| FreeTmpBuffer(); | |||
| MS_LOG(ERROR) << "MallocTmpBuffer failed"; | |||
| @@ -76,11 +76,6 @@ static int SplitFp16Run(void *cdata, int task_id) { | |||
| } | |||
| int SplitFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); | |||
| if (input_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "input or output is nullptr"; | |||
| @@ -94,7 +89,7 @@ int SplitFp16CPUKernel::Run() { | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, SplitFp16Run, this, thread_n_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SplitFp16Run, this, thread_n_num_); | |||
| for (int i = 0; i < param->num_split_; i++) { | |||
| if (out_tensors_.at(i)->data_type() == kNumberTypeFloat32) { | |||
| Float16ToFloat32(output_ptr_[i], reinterpret_cast<float *>(out_tensors_.at(i)->MutableData()), | |||
| @@ -76,11 +76,6 @@ void StackFp16CPUKernel::FreeBuffer() { | |||
| } | |||
| int StackFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| size_t inputs_num = in_tensors_.size(); | |||
| auto input0 = in_tensors_[0]; | |||
| if (inputs_num == 1) { | |||
| @@ -88,7 +83,7 @@ int StackFp16CPUKernel::Run() { | |||
| return RET_OK; | |||
| } | |||
| InitMallocFlags(); | |||
| ret = MallocAssignBuffer(); | |||
| auto ret = MallocAssignBuffer(); | |||
| if (ret != RET_OK) { | |||
| FreeBuffer(); | |||
| return ret; | |||
| @@ -128,11 +128,6 @@ static int TransposeFp16Run(void *cdata, int task_id) { | |||
| } | |||
| int TransposeFp16CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| MS_ASSERT(in_tensors_.size() == TransposeInputNum); | |||
| MS_ASSERT(out_tensors_.size() == TransposeOutputNum); | |||
| auto &in_tensor = in_tensors_.front(); | |||
| @@ -143,7 +138,7 @@ int TransposeFp16CPUKernel::Run() { | |||
| } | |||
| // malloc when Run | |||
| ret = MallocFp16Buffer(); | |||
| auto ret = MallocFp16Buffer(); | |||
| if (ret != RET_OK) { | |||
| FreeFp16Buffer(); | |||
| return ret; | |||
| @@ -83,11 +83,6 @@ int ActivationRun(void *cdata, int task_id) { | |||
| } | |||
| int ActivationCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationRun, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | |||
| @@ -55,11 +55,6 @@ int AddNCPUKernel::AddNParallelRun(int thread_id) { | |||
| } | |||
| int AddNCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| elements_num_ = out_tensors_[0]->ElementsNum(); | |||
| auto input0_data = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | |||
| auto input1_data = reinterpret_cast<float *>(in_tensors_[1]->MutableData()); | |||
| @@ -94,7 +89,7 @@ int AddNCPUKernel::Run() { | |||
| in1_addr_ = input0_data; | |||
| in2_addr_ = input1_data; | |||
| out_addr_ = output_data; | |||
| ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "addn launch fail!ret: " << ret; | |||
| return RET_ERROR; | |||
| @@ -45,12 +45,7 @@ int ArgMinMaxCPUKernel::Init() { | |||
| int ArgMinMaxCPUKernel::ReSize() { return ArgMinMaxBaseCPUKernel::ReSize(); } | |||
| int ArgMinMaxCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = ArgMinMaxBaseCPUKernel::Run(); | |||
| auto ret = ArgMinMaxBaseCPUKernel::Run(); | |||
| return ret; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -189,39 +189,38 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) { | |||
| int out_count = MSMIN(stride, outside_ - stride * task_id); | |||
| int out_thread_stride = stride * task_id; | |||
| if (data_type_ == kDataTypeFloat) { | |||
| error_code = | |||
| BroadcastRun(reinterpret_cast<float *>(in_tensors_[0]->MutableData()), | |||
| reinterpret_cast<float *>(in_tensors_[1]->MutableData()), | |||
| reinterpret_cast<float *>(out_tensors_[0]->MutableData()), 0, out_count, out_thread_stride); | |||
| error_code = BroadcastRun(reinterpret_cast<float *>(in_tensors_[0]->data_c()), | |||
| reinterpret_cast<float *>(in_tensors_[1]->data_c()), | |||
| reinterpret_cast<float *>(out_tensors_[0]->data_c()), 0, out_count, out_thread_stride); | |||
| } else { | |||
| error_code = BroadcastRun( | |||
| reinterpret_cast<int *>(in_tensors_[0]->MutableData()), reinterpret_cast<int *>(in_tensors_[1]->MutableData()), | |||
| reinterpret_cast<int *>(out_tensors_[0]->MutableData()), 0, out_count, out_thread_stride); | |||
| error_code = BroadcastRun(reinterpret_cast<int *>(in_tensors_[0]->data_c()), | |||
| reinterpret_cast<int *>(in_tensors_[1]->data_c()), | |||
| reinterpret_cast<int *>(out_tensors_[0]->data_c()), 0, out_count, out_thread_stride); | |||
| } | |||
| } else if (arithmetic_opt_run_ != nullptr) { // no broadcast, one of input is scalar | |||
| if (arithmeticParameter_->in_elements_num0_ == 1) { | |||
| if (data_type_ == kDataTypeFloat) { | |||
| error_code = arithmetic_opt_run_(reinterpret_cast<float *>(in_tensors_[0]->MutableData()), | |||
| reinterpret_cast<float *>(in_tensors_[1]->MutableData()) + stride * task_id, | |||
| reinterpret_cast<float *>(out_tensors_[0]->MutableData()) + stride * task_id, | |||
| count, arithmeticParameter_); | |||
| error_code = arithmetic_opt_run_(reinterpret_cast<float *>(in_tensors_[0]->data_c()), | |||
| reinterpret_cast<float *>(in_tensors_[1]->data_c()) + stride * task_id, | |||
| reinterpret_cast<float *>(out_tensors_[0]->data_c()) + stride * task_id, count, | |||
| arithmeticParameter_); | |||
| } else { | |||
| error_code = arithmetic_opt_run_int_(reinterpret_cast<int *>(in_tensors_[0]->MutableData()), | |||
| reinterpret_cast<int *>(in_tensors_[1]->MutableData()) + stride * task_id, | |||
| reinterpret_cast<int *>(out_tensors_[0]->MutableData()) + stride * task_id, | |||
| error_code = arithmetic_opt_run_int_(reinterpret_cast<int *>(in_tensors_[0]->data_c()), | |||
| reinterpret_cast<int *>(in_tensors_[1]->data_c()) + stride * task_id, | |||
| reinterpret_cast<int *>(out_tensors_[0]->data_c()) + stride * task_id, | |||
| count, arithmeticParameter_); | |||
| } | |||
| } else if (arithmeticParameter_->in_elements_num1_ == 1) { | |||
| if (data_type_ == kDataTypeFloat) { | |||
| error_code = arithmetic_opt_run_(reinterpret_cast<float *>(in_tensors_[0]->MutableData()) + stride * task_id, | |||
| reinterpret_cast<float *>(in_tensors_[1]->MutableData()), | |||
| reinterpret_cast<float *>(out_tensors_[0]->MutableData()) + stride * task_id, | |||
| count, arithmeticParameter_); | |||
| error_code = arithmetic_opt_run_(reinterpret_cast<float *>(in_tensors_[0]->data_c()) + stride * task_id, | |||
| reinterpret_cast<float *>(in_tensors_[1]->data_c()), | |||
| reinterpret_cast<float *>(out_tensors_[0]->data_c()) + stride * task_id, count, | |||
| arithmeticParameter_); | |||
| } else { | |||
| error_code = arithmetic_opt_run_int_(reinterpret_cast<int *>(in_tensors_[0]->MutableData()) + stride * task_id, | |||
| reinterpret_cast<int *>(in_tensors_[1]->MutableData()), | |||
| reinterpret_cast<int *>(out_tensors_[0]->MutableData()) + stride * task_id, | |||
| error_code = arithmetic_opt_run_int_(reinterpret_cast<int *>(in_tensors_[0]->data_c()) + stride * task_id, | |||
| reinterpret_cast<int *>(in_tensors_[1]->data_c()), | |||
| reinterpret_cast<int *>(out_tensors_[0]->data_c()) + stride * task_id, | |||
| count, arithmeticParameter_); | |||
| } | |||
| } else { | |||
| @@ -230,14 +229,13 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) { | |||
| } | |||
| } else { // no broadcast, neither is scalar, two same shape | |||
| if (data_type_ == kDataTypeFloat) { | |||
| error_code = arithmetic_run_(reinterpret_cast<float *>(in_tensors_[0]->MutableData()) + stride * task_id, | |||
| reinterpret_cast<float *>(in_tensors_[1]->MutableData()) + stride * task_id, | |||
| reinterpret_cast<float *>(out_tensors_[0]->MutableData()) + stride * task_id, count); | |||
| error_code = arithmetic_run_(reinterpret_cast<float *>(in_tensors_[0]->data_c()) + stride * task_id, | |||
| reinterpret_cast<float *>(in_tensors_[1]->data_c()) + stride * task_id, | |||
| reinterpret_cast<float *>(out_tensors_[0]->data_c()) + stride * task_id, count); | |||
| } else { | |||
| error_code = | |||
| arithmetic_run_int_(reinterpret_cast<int *>(in_tensors_[0]->MutableData()) + stride * task_id, | |||
| reinterpret_cast<int *>(in_tensors_[1]->MutableData()) + stride * task_id, | |||
| reinterpret_cast<int *>(out_tensors_[0]->MutableData()) + stride * task_id, count); | |||
| error_code = arithmetic_run_int_(reinterpret_cast<int *>(in_tensors_[0]->data_c()) + stride * task_id, | |||
| reinterpret_cast<int *>(in_tensors_[1]->data_c()) + stride * task_id, | |||
| reinterpret_cast<int *>(out_tensors_[0]->data_c()) + stride * task_id, count); | |||
| } | |||
| } | |||
| if (error_code != RET_OK) { | |||
| @@ -257,11 +255,6 @@ int ArithmeticsRun(void *cdata, int task_id) { | |||
| } | |||
| int ArithmeticCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| if (arithmeticParameter_->broadcasting_) { | |||
| outside_ = 1; | |||
| for (auto i = arithmeticParameter_->ndim_ - 1; i >= 0; --i) { | |||
| @@ -18,6 +18,8 @@ | |||
| #include "nnacl/fp32/arithmetic_self.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| namespace { | |||
| @@ -88,12 +90,7 @@ int ArithmeticSelfRun(void *cdata, int task_id) { | |||
| } | |||
| int ArithmeticSelfCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | |||
| } | |||
| @@ -38,11 +38,6 @@ int BatchToSpaceCPUKernel::Init() { | |||
| int BatchToSpaceCPUKernel::ReSize() { return BatchToSpaceBaseCPUKernel::ReSize(); } | |||
| int BatchToSpaceCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = in_tensors_[0]; | |||
| auto output = out_tensors_[0]; | |||
| const float *input_data = reinterpret_cast<const float *>(input->MutableData()); | |||
| @@ -18,6 +18,8 @@ | |||
| #include "src/kernel_registry.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_BatchNorm; | |||
| namespace mindspore::kernel { | |||
| @@ -70,12 +72,7 @@ int BatchnormCPUKernel::InitConstTensor() { | |||
| } | |||
| int BatchnormCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; | |||
| return ret; | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||
| } | |||
| @@ -85,7 +82,7 @@ int BatchnormCPUKernel::Run() { | |||
| int BatchnormCPUKernel::DoExecute(int task_id) { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| BatchNormFp32(in_tensors_.at(0)->MutableData(), mean_, variance_, param, task_id, out_tensors_.at(0)->MutableData()); | |||
| return mindspore::lite::RET_OK; | |||
| return RET_OK; | |||
| } | |||
| int BatchNormRun(void *cdata, int task_id) { | |||
| @@ -42,11 +42,6 @@ int BiasCPUKernel::ReSize() { | |||
| } | |||
| int BiasCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto in = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| auto bias = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| auto out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| @@ -49,11 +49,6 @@ int BroadcastToCPUKernel::Init() { | |||
| } | |||
| int BroadcastToCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| @@ -66,15 +66,16 @@ int CastCPUKernel::DoCast(int thread_id) { | |||
| auto offset = thread_id * stride_; | |||
| auto output = out_tensors_.at(0); | |||
| auto output_data = output->MutableData(); | |||
| auto output_data = output->data_c(); | |||
| MS_ASSERT(output_data != nullptr); | |||
| auto input_data_type = input->data_type(); | |||
| auto output_data_type = output->data_type(); | |||
| if (output_data_type != kNumberTypeFloat32) { | |||
| if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) { | |||
| Float32ToInt32(reinterpret_cast<float *>(input->MutableData()) + offset, | |||
| Float32ToInt32(reinterpret_cast<float *>(input->data_c()) + offset, | |||
| reinterpret_cast<int32_t *>(output_data) + offset, data_num); | |||
| } else if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeFloat16) { | |||
| Float32ToFp16(reinterpret_cast<float *>(input->MutableData()) + offset, | |||
| Float32ToFp16(reinterpret_cast<float *>(input->data_c()) + offset, | |||
| reinterpret_cast<uint16_t *>(output_data) + offset, data_num); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported datatype from " << input_data_type << " to " << output_data_type; | |||
| @@ -106,11 +107,6 @@ int CastCPUKernel::DoCast(int thread_id) { | |||
| } | |||
| int CastCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| if (data_num_ == 0) { | |||
| return RET_OK; | |||
| } | |||
| @@ -75,11 +75,6 @@ int ConcatsRun(void *cdata, int task_id) { | |||
| } | |||
| int ConcatCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ConcatsRun, this, thread_count_); | |||
| return error_code; | |||
| } | |||
| @@ -52,11 +52,6 @@ int ConstantOfShapeRun(void *cdata, int task_id) { | |||
| } | |||
| int ConstantOfShapeCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| param_->element_sz_ = out_tensors_.front()->ElementsNum(); | |||
| int thread_num = MSMIN(param_->op_parameter_.thread_num_, param_->element_sz_); | |||
| param_->unit_ = UP_DIV(param_->element_sz_, thread_num); | |||
| @@ -141,12 +141,6 @@ int ConvolutionImpl(void *cdata, int task_id) { | |||
| } | |||
| int ConvolutionCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| @@ -194,11 +194,6 @@ int Convolution1x1RunHw(void *cdata, int task_id) { | |||
| } | |||
| int Convolution1x1CPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto src_in = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | |||
| auto src_out = reinterpret_cast<float *>(out_tensors_[0]->MutableData()); | |||
| @@ -101,19 +101,13 @@ int ConvDwRun(void *cdata, int task_id) { | |||
| } | |||
| int ConvolutionDepthwiseCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| auto input_tensor = in_tensors_.at(kInputIndex); | |||
| input_ptr_ = reinterpret_cast<float *>(input_tensor->MutableData()); | |||
| auto output_tensor = out_tensors_.at(kOutputIndex); | |||
| output_ptr_ = reinterpret_cast<float *>(output_tensor->MutableData()); | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwRun, this, conv_param_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwRun, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -134,12 +134,7 @@ int ConvDwSWRun(void *cdata, int task_id) { | |||
| } | |||
| int ConvolutionDepthwiseSWCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return ret; | |||
| } | |||
| ret = InitBuffer(); | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| @@ -219,12 +219,6 @@ int ConvolutionWinogradImpl(void *cdata, int task_id) { | |||
| } | |||
| int ConvolutionWinogradCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| @@ -53,11 +53,6 @@ int CropCPUKernel::CropParallelRun(int thread_id) { | |||
| } | |||
| int CropCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = in_tensors_[0]; | |||
| auto output = out_tensors_[0]; | |||
| auto param = reinterpret_cast<CropParameter *>(op_parameter_); | |||
| @@ -196,11 +196,6 @@ int DeConvolutionCPUKernel::InitRunBuf() { | |||
| } | |||
| int DeConvolutionCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| float *src_in = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | |||
| float *src_out = reinterpret_cast<float *>(out_tensors_[0]->MutableData()); | |||
| @@ -151,13 +151,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() { | |||
| return RET_ERROR; | |||
| } | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = InitBuffer(); | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; | |||
| return ret; | |||
| @@ -47,11 +47,6 @@ int DepthToSpaceCPUKernel::Init() { | |||
| int DepthToSpaceCPUKernel::ReSize() { return DepthToSpaceBaseCPUKernel::ReSize(); } | |||
| int DepthToSpaceCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = in_tensors_[0]; | |||
| auto output = out_tensors_[0]; | |||
| const float *input_data = reinterpret_cast<const float *>(input->MutableData()); | |||
| @@ -66,11 +66,6 @@ DetectionPostProcessCPUKernel::~DetectionPostProcessCPUKernel() { | |||
| int DetectionPostProcessCPUKernel::ReSize() { return RET_OK; } | |||
| int DetectionPostProcessCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_boxes = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| auto input_scores = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| @@ -57,11 +57,6 @@ int EluRun(void *cdata, int task_id) { | |||
| } | |||
| int EluCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | |||
| output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||
| @@ -72,12 +72,6 @@ int EmbeddingLookupRun(void *cdata, int task_id) { | |||
| } | |||
| int EmbeddingLookupCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| MS_ASSERT(context_->allocator != nullptr); | |||
| input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc( | |||
| sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_)); | |||
| @@ -69,11 +69,6 @@ int ExpRun(void *cdata, int task_id) { | |||
| } | |||
| int ExpCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| input_addr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | |||
| output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||
| exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum(); | |||
| @@ -77,11 +77,6 @@ int ExpandDimsRun(void *cdata, int task_id) { | |||
| } | |||
| int ExpandDimsCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| in_ptr_ = in_tensors_.at(0)->MutableData(); | |||
| out_ptr_ = out_tensors_.at(0)->MutableData(); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ExpandDimsRun, this, thread_sz_count_); | |||
| @@ -67,11 +67,6 @@ int FillRun(void *cdata, int task_id) { | |||
| } | |||
| int FillCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto fillData = in_tensors_.at(in_tensors_.size() - 1); | |||
| auto output = out_tensors_.front(); | |||
| auto fill_data = reinterpret_cast<float *>(fillData->MutableData()); | |||
| @@ -44,11 +44,6 @@ int FlattenCPUKernel::ReSize() { | |||
| } | |||
| int FlattenCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | |||
| auto output = reinterpret_cast<float *>(out_tensors_[0]->MutableData()); | |||
| Flatten(input, output, flatten_param_); | |||
| @@ -162,11 +162,6 @@ int FullconnectionCPUKernel::DoMatmul(int task_id) { | |||
| } | |||
| int FullconnectionCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto a_ptr = reinterpret_cast<float *>(in_tensors_.at(0)->data_c()); | |||
| auto b_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->data_c()); | |||
| c_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | |||
| @@ -18,6 +18,8 @@ | |||
| #include "src/kernel_registry.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_FusedBatchNorm; | |||
| namespace mindspore::kernel { | |||
| @@ -84,11 +86,6 @@ int FusedBatchnormCPUKernel::InitConstTensor() { | |||
| } | |||
| int FusedBatchnormCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; | |||
| return ret; | |||
| } | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| if (is_train() && in_tensors_.size() >= 5) { | |||
| float *in = static_cast<float *>(in_tensors_[0]->MutableData()); | |||
| @@ -108,7 +105,7 @@ int FusedBatchnormCPUKernel::Run() { | |||
| memcpy(offset_, bias, in_tensors_[2]->Size()); | |||
| trained_ = true; // trained at least once | |||
| } | |||
| ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||
| } | |||
| @@ -137,7 +134,7 @@ int FusedBatchnormCPUKernel::DoExecute(int task_id) { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| FusedBatchNormFp32(in_tensors_.at(0)->MutableData(), scale_, offset_, mean_, variance_, param, task_id, | |||
| out_tensors_.at(0)->MutableData()); | |||
| return mindspore::lite::RET_OK; | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -91,12 +91,6 @@ int GatherRun(void *cdata, int task_id) { | |||
| } | |||
| int GatherCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto indices_tensor = in_tensors_.at(1); | |||
| int indices_num = indices_tensor->ElementsNum(); | |||
| bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32; | |||
| @@ -116,11 +116,6 @@ int GatherNdRun(void *cdata, int task_id) { | |||
| } | |||
| int GatherNdCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | |||
| out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, GatherNdRun, this, thread_sz_count_); | |||
| @@ -141,17 +141,12 @@ int L2NormTrailingAxisRun(void *cdata, int task_id) { | |||
| } | |||
| int L2NormCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! ret: " << ret; | |||
| return ret; | |||
| } | |||
| auto input_shape = in_tensors().at(kInputIndex)->shape(); | |||
| input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData()); | |||
| output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()); | |||
| if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) { | |||
| // all axis | |||
| ret = ParallelLaunch(this->context_->thread_pool_, SquareSumRun, this, context_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SquareSumRun, this, context_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -167,7 +162,7 @@ int L2NormCPUKernel::Run() { | |||
| return RET_ERROR; | |||
| } | |||
| } else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) { | |||
| ret = ParallelLaunch(this->context_->thread_pool_, L2NormTrailingAxisRun, this, context_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, L2NormTrailingAxisRun, this, context_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -74,11 +74,6 @@ int LocalResponseNormRun(void *cdata, int task_id) { | |||
| } | |||
| int LocalResponseNormCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, LocalResponseNormRun, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]"; | |||
| @@ -147,11 +147,6 @@ int LstmCPUKernel::ReSize() { | |||
| } | |||
| int LstmCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = in_tensors_.at(kInputIndex); | |||
| MS_ASSERT(input != nullptr); | |||
| auto hidden_state = in_tensors_.at(4); | |||
| @@ -281,11 +281,6 @@ int MatmulFloatRun(void *cdata, int task_id) { | |||
| } | |||
| int MatmulCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto a_src = reinterpret_cast<float *>(in_tensors_[0]->data_c()); | |||
| auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c()); | |||
| auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c()); | |||
| @@ -28,11 +28,6 @@ int Nchw2NhwcCPUKernel::Init() { return RET_OK; } | |||
| int Nchw2NhwcCPUKernel::ReSize() { return RET_OK; } | |||
| int Nchw2NhwcCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = in_tensors_[0]; | |||
| auto output = out_tensors_[0]; | |||
| @@ -28,11 +28,6 @@ int Nhwc2NchwCPUKernel::Init() { return RET_OK; } | |||
| int Nhwc2NchwCPUKernel::ReSize() { return RET_OK; } | |||
| int Nhwc2NchwCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input = in_tensors_[0]; | |||
| auto output = out_tensors_[0]; | |||
| @@ -161,11 +161,6 @@ int OneHotCPUKernel::GetParams() { | |||
| } | |||
| int OneHotCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, RunOneHot, this, context_->thread_num_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]"; | |||
| @@ -227,12 +227,6 @@ int PadCPUKernel::HandleMirrorPad() { | |||
| } | |||
| int PadCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| int error_code; | |||
| if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) { | |||
| auto output = out_tensors_.at(0); | |||
| @@ -84,11 +84,6 @@ int PoolingImpl(void *cdata, int task_id) { | |||
| } | |||
| int PoolingCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingImpl, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; | |||
| @@ -41,11 +41,6 @@ int PowerImpl(void *cdata, int task_id) { | |||
| } | |||
| int PowerCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImpl, this, thread_count_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "PowerCPUKernel error: " << ret; | |||
| @@ -107,11 +107,6 @@ int PReluCPUKernel::ProcessShareChannelInput() { | |||
| } | |||
| int PReluCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| MS_ASSERT(in_shape.size() >= 2); | |||
| auto input_tensor = in_tensors_[0]; | |||
| ori_input_ = reinterpret_cast<float *>(input_tensor->MutableData()); | |||
| @@ -32,11 +32,6 @@ int RangeCPUKernel::Init() { return RET_OK; } | |||
| int RangeCPUKernel::ReSize() { return RET_OK; } | |||
| int RangeCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| size_t start = (reinterpret_cast<RangeParameter *>(op_parameter_))->start_; | |||
| size_t limit = (reinterpret_cast<RangeParameter *>(op_parameter_))->limit_; | |||
| size_t delta = (reinterpret_cast<RangeParameter *>(op_parameter_))->delta_; | |||
| @@ -32,11 +32,6 @@ int RankCPUKernel::Init() { return RET_OK; } | |||
| int RankCPUKernel::ReSize() { return RET_OK; } | |||
| int RankCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| auto in_shape = in_tensors_[0]->shape(); | |||
| auto rank = in_shape.size(); | |||
| @@ -113,11 +113,6 @@ int ReduceImpl(void *cdata, int task_id) { | |||
| } | |||
| int ReduceCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| if (in_tensors().at(0)->data_type() == kNumberTypeFloat32) { | |||
| data_type_ = kDataTypeFloat; | |||
| } else { | |||
| @@ -129,8 +124,8 @@ int ReduceCPUKernel::Run() { | |||
| return ret; | |||
| } | |||
| src_data_ = in_tensors_.at(0)->MutableData(); | |||
| PreProcess(); | |||
| src_data_ = in_tensors_.at(0)->data_c(); | |||
| HandleASumAndSumSquare(); | |||
| for (size_t i = 0; i < static_cast<size_t>(num_axes_); ++i) { | |||
| if (i != static_cast<size_t>(num_axes_ - 1)) { | |||
| dst_data_ = data_buffers_[i]; | |||
| @@ -159,12 +154,12 @@ int ReduceCPUKernel::Run() { | |||
| return RET_OK; | |||
| } | |||
| void ReduceCPUKernel::PreProcess() { | |||
| void ReduceCPUKernel::HandleASumAndSumSquare() { | |||
| if (data_type_ == kDataTypeInt) { | |||
| return; | |||
| } | |||
| int num = in_tensors_.at(0)->ElementsNum(); | |||
| float *data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| float *data = reinterpret_cast<float *>(in_tensors_.at(0)->data_c()); | |||
| if (data == nullptr) { | |||
| return; | |||
| } | |||
| @@ -65,7 +65,7 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel { | |||
| int MallocTmpBuffer(); | |||
| void FreeTmpBuffer(); | |||
| int CalculateCoeffOutput(); | |||
| void PreProcess(); | |||
| void HandleASumAndSumSquare(); | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -36,11 +36,6 @@ int ReshapeCPUKernel::Init() { | |||
| int ReshapeCPUKernel::ReSize() { return RET_OK; } | |||
| int ReshapeCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| auto input_ptr = in_tensors_.at(kInputIndex)->MutableData(); | |||
| auto output_ptr = out_tensors_.at(kOutputIndex)->MutableData(); | |||
| size_t data_size = in_tensors_.at(kInputIndex)->Size(); | |||
| @@ -204,11 +204,6 @@ int ResizeCPUKernel::RunImpl(int task_id) { | |||
| } | |||
| int ResizeCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ResizeImpl, this, context_->thread_num_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; | |||
| @@ -125,14 +125,9 @@ int ReverseCPUKernel::DoReverse(int task_id) { | |||
| } | |||
| int ReverseCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare failed."; | |||
| return RET_ERROR; | |||
| } | |||
| in_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | |||
| out_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData()); | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ReverseRun, this, thread_sz_count_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ReverseRun, this, thread_sz_count_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]"; | |||
| return ret; | |||
| @@ -87,11 +87,6 @@ int ReverseSequenceCPUKernel::ReSize() { | |||
| } | |||
| int ReverseSequenceCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| float *input0 = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| void *input1 = in_tensors_.at(1)->MutableData(); | |||
| float *output = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| @@ -93,15 +93,10 @@ int ROIPoolingRun(void *cdata, int task_id) { | |||
| } | |||
| int ROIPoolingCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! ret: " << ret; | |||
| return ret; | |||
| } | |||
| in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | |||
| out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||
| roi_ptr_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ROIPoolingRun, this, param_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ROIPoolingRun, this, param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]"; | |||
| return ret; | |||
| @@ -174,11 +174,6 @@ int ScaleRun(void *cdata, int task_id) { | |||
| } | |||
| int ScaleCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| auto in_tensor = in_tensors_.front(); | |||
| input_ptr_ = reinterpret_cast<float *>(in_tensor->data_c()); | |||
| if (!scale_param_->const_scale_) { | |||
| @@ -193,7 +188,7 @@ int ScaleCPUKernel::Run() { | |||
| auto out_tensor = out_tensors_.front(); | |||
| output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData()); | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||