From: @gongdaguo Reviewed-by: @jpc_chenjianping,@zhanghaibo5 Signed-off-by: @jpc_chenjianpingpull/15176/MERGE
| @@ -841,7 +841,7 @@ table Rsqrt { | |||||
| } | } | ||||
| table QuantDTypeCast { | table QuantDTypeCast { | ||||
| src_t: long; // deprecated | |||||
| src_t: long; | |||||
| dst_t: long; | dst_t: long; | ||||
| } | } | ||||
| @@ -30,6 +30,7 @@ | |||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "include/context.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| enum KERNEL_ARCH { | enum KERNEL_ARCH { | ||||
| @@ -64,7 +65,7 @@ class LiteKernel { | |||||
| public: | public: | ||||
| LiteKernel() = default; | LiteKernel() = default; | ||||
| LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors, | LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors, | ||||
| const lite::InnerContext *ctx) | |||||
| const lite::Context *ctx) | |||||
| : op_parameter_(parameter), | : op_parameter_(parameter), | ||||
| in_tensors_(std::move(in_tensors)), | in_tensors_(std::move(in_tensors)), | ||||
| out_tensors_(std::move(out_tensors)), | out_tensors_(std::move(out_tensors)), | ||||
| @@ -175,7 +176,7 @@ class LiteKernel { | |||||
| SubGraphType subgraph_type() const { return this->subgraph_type_; } | SubGraphType subgraph_type() const { return this->subgraph_type_; } | ||||
| const lite::InnerContext *context() const { return this->context_; } | |||||
| const lite::Context *context() const { return this->context_; } | |||||
| virtual std::string ToString() const; | virtual std::string ToString() const; | ||||
| @@ -202,7 +203,7 @@ class LiteKernel { | |||||
| // tensor will free in ~lite_session() | // tensor will free in ~lite_session() | ||||
| std::vector<lite::Tensor *> in_tensors_; | std::vector<lite::Tensor *> in_tensors_; | ||||
| std::vector<lite::Tensor *> out_tensors_; | std::vector<lite::Tensor *> out_tensors_; | ||||
| const lite::InnerContext *context_ = nullptr; | |||||
| const lite::Context *context_ = nullptr; | |||||
| std::vector<LiteKernel *> in_kernels_; | std::vector<LiteKernel *> in_kernels_; | ||||
| std::vector<LiteKernel *> out_kernels_; | std::vector<LiteKernel *> out_kernels_; | ||||
| bool train_mode_ = false; | bool train_mode_ = false; | ||||
| @@ -217,13 +218,13 @@ class LiteKernel { | |||||
| typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs, | typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, | ||||
| const lite::InnerContext *ctx, const KernelKey &desc); | |||||
| const lite::Context *ctx, const KernelKey &desc); | |||||
| template <class T> | template <class T> | ||||
| kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, | ||||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc) { | |||||
| auto *kernel = new (std::nothrow) T(parameter, inputs, outputs, ctx); | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||||
| auto *kernel = new (std::nothrow) T(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr."; | MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr."; | ||||
| free(parameter); | free(parameter); | ||||
| @@ -206,7 +206,8 @@ int SubGraphNpuKernel::Init() { | |||||
| MS_ASSERT(npu_manager_ != nullptr); | MS_ASSERT(npu_manager_ != nullptr); | ||||
| npu_manager_->AddModel(model_buffer_data, GetOMModelName(), context_->GetNpuInfo().frequency_); | |||||
| npu_manager_->AddModel(model_buffer_data, GetOMModelName(), | |||||
| static_cast<const lite::InnerContext *>(context_)->GetNpuInfo().frequency_); | |||||
| executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_); | executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_); | ||||
| @@ -73,7 +73,8 @@ int ConstantOfShapeCPUKernel::Run() { | |||||
| int thread_count = MSMIN(op_parameter_->thread_num_, param_->element_size_); | int thread_count = MSMIN(op_parameter_->thread_num_, param_->element_size_); | ||||
| thread_stride_ = UP_DIV(param_->element_size_, thread_count); | thread_stride_ = UP_DIV(param_->element_size_, thread_count); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ConstantOfShapeRun, this, thread_count); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConstantOfShapeRun, | |||||
| this, thread_count); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -144,17 +144,7 @@ void DetectionPostProcessBaseCPUKernel::FreeAllocatedBuffer() { | |||||
| } | } | ||||
| } | } | ||||
| int DetectionPostProcessBaseCPUKernel::Run() { | |||||
| MS_ASSERT(context_->allocator != nullptr); | |||||
| int status = GetInputData(); | |||||
| if (status != RET_OK) { | |||||
| return status; | |||||
| } | |||||
| auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | |||||
| auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->data_c()); | |||||
| auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->data_c()); | |||||
| auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->data_c()); | |||||
| int DetectionPostProcessBaseCPUKernel::ParamInit() { | |||||
| num_boxes_ = in_tensors_.at(0)->shape().at(1); | num_boxes_ = in_tensors_.at(0)->shape().at(1); | ||||
| num_classes_with_bg_ = in_tensors_.at(1)->shape().at(2); | num_classes_with_bg_ = in_tensors_.at(1)->shape().at(2); | ||||
| params_->decoded_boxes_ = context_->allocator->Malloc(num_boxes_ * 4 * sizeof(float)); | params_->decoded_boxes_ = context_->allocator->Malloc(num_boxes_ * 4 * sizeof(float)); | ||||
| @@ -221,6 +211,24 @@ int DetectionPostProcessBaseCPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| return RET_OK; | |||||
| } | |||||
| int DetectionPostProcessBaseCPUKernel::Run() { | |||||
| MS_ASSERT(context_->allocator != nullptr); | |||||
| int status = GetInputData(); | |||||
| if (status != RET_OK) { | |||||
| return status; | |||||
| } | |||||
| auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | |||||
| auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->data_c()); | |||||
| auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->data_c()); | |||||
| auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->data_c()); | |||||
| if (ParamInit() != RET_OK) { | |||||
| MS_LOG(ERROR) << "ParamInit error"; | |||||
| return status; | |||||
| } | |||||
| status = DecodeBoxes(num_boxes_, input_boxes_, params_->anchors_, params_); | status = DecodeBoxes(num_boxes_, input_boxes_, params_->anchors_, params_); | ||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| @@ -238,7 +246,8 @@ int DetectionPostProcessBaseCPUKernel::Run() { | |||||
| return status; | return status; | ||||
| } | } | ||||
| } else { | } else { | ||||
| status = ParallelLaunch(this->context_->thread_pool_, NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_); | |||||
| status = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_); | |||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]"; | MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]"; | ||||
| FreeAllocatedBuffer(); | FreeAllocatedBuffer(); | ||||
| @@ -47,6 +47,7 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel { | |||||
| protected: | protected: | ||||
| virtual int GetInputData() = 0; | virtual int GetInputData() = 0; | ||||
| int ParamInit(); | |||||
| private: | private: | ||||
| void FreeAllocatedBuffer(); | void FreeAllocatedBuffer(); | ||||
| @@ -166,7 +166,8 @@ int RunPriorBox(void *cdata, int task_id) { | |||||
| } | } | ||||
| int PriorBoxCPUKernel::Run() { | int PriorBoxCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, RunPriorBox, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, RunPriorBox, | |||||
| this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]"; | MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -172,7 +172,8 @@ int QuantDTypeCastCPUKernel::Run() { | |||||
| uint8_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_[0]->data_c()); | uint8_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_[0]->data_c()); | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, QuantDTypeCastRun, this, thread_n_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, QuantDTypeCastRun, | |||||
| this, thread_n_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | ||||
| if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 && | if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 && | ||||
| @@ -66,7 +66,8 @@ int ReshapeRun(void *cdata, int task_id) { | |||||
| int ReshapeBaseCPUKernel::Run() { | int ReshapeBaseCPUKernel::Run() { | ||||
| input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c()); | input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c()); | ||||
| output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c()); | output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ReshapeRun, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReshapeRun, this, | |||||
| context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -81,7 +81,8 @@ int SliceCPUKernel::Run() { | |||||
| lite::DataTypeSize(in_tensors_.at(0)->data_type())); | lite::DataTypeSize(in_tensors_.at(0)->data_type())); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SliceLaunch, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "slice launch fail!ret: " << ret; | MS_LOG(ERROR) << "slice launch fail!ret: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -120,7 +120,8 @@ int SplitBaseCPUKernel::Run() { | |||||
| output_ptr_.at(i) = output_tensor->data_c(); | output_ptr_.at(i) = output_tensor->data_c(); | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SplitRun, this, | |||||
| thread_n_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "split error error_code[" << ret << "]"; | MS_LOG(ERROR) << "split error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -100,7 +100,8 @@ int StackBaseCPUKernel::Run() { | |||||
| } | } | ||||
| // run stack | // run stack | ||||
| num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_); | num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, StackRun, this, num_threads_); | |||||
| auto ret = | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StackRun, this, num_threads_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -157,7 +157,8 @@ int StridedSliceCPUKernel::FastRun() { | |||||
| } | } | ||||
| input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c()); | input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c()); | ||||
| output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c()); | output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, StrideRun, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StrideRun, this, | |||||
| context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -127,7 +127,8 @@ int TileCPUKernel::SimpleTileImpl(int task_id) { | |||||
| } | } | ||||
| int TileCPUKernel::RunSimpleTile() { | int TileCPUKernel::RunSimpleTile() { | ||||
| auto ret = ParallelLaunch(context_->thread_pool_, SimpleTile, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SimpleTile, this, | |||||
| context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]"; | MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -100,7 +100,8 @@ int ActivationFp16CPUKernel::Run() { | |||||
| fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | ||||
| fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationFp16Run, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| ActivationFp16Run, this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -160,15 +160,16 @@ int ArithmeticCompareFP16CPUKernel::Run() { | |||||
| is_input0_fp32_ = in_tensors_.at(0)->data_type() == kNumberTypeFloat32; | is_input0_fp32_ = in_tensors_.at(0)->data_type() == kNumberTypeFloat32; | ||||
| is_input1_fp32_ = in_tensors_.at(1)->data_type() == kNumberTypeFloat32; | is_input1_fp32_ = in_tensors_.at(1)->data_type() == kNumberTypeFloat32; | ||||
| input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); | |||||
| input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); | |||||
| input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_)); | |||||
| input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_)); | |||||
| output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->MutableData()); | output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->MutableData()); | ||||
| if (input0_fp16_ == nullptr || input1_fp16_ == nullptr || output_fp16_ == nullptr) { | if (input0_fp16_ == nullptr || input1_fp16_ == nullptr || output_fp16_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Memory allocation failed"; | MS_LOG(ERROR) << "Memory allocation failed"; | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRunFp16, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRunFp16, | |||||
| this, context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]"; | MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -127,13 +127,13 @@ void ArithmeticFP16CPUKernel::InitRunFunction(int primitive_type) { | |||||
| int ArithmeticFP16CPUKernel::ConstTensorBroadCast() { | int ArithmeticFP16CPUKernel::ConstTensorBroadCast() { | ||||
| int ret; | int ret; | ||||
| if (in_tensors_[0]->data_c() != nullptr) { | if (in_tensors_[0]->data_c() != nullptr) { | ||||
| ret = ConvertFp32TensorToFp16(in_tensors_[0], context_); | |||||
| ret = ConvertFp32TensorToFp16(in_tensors_[0], static_cast<const lite::InnerContext *>(this->context_)); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| if (in_tensors_[1]->data_c() != nullptr) { | if (in_tensors_[1]->data_c() != nullptr) { | ||||
| ret = ConvertFp32TensorToFp16(in_tensors_[1], context_); | |||||
| ret = ConvertFp32TensorToFp16(in_tensors_[1], static_cast<const lite::InnerContext *>(this->context_)); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -167,18 +167,19 @@ int ArithmeticFP16CPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (!input0_broadcast_) { | if (!input0_broadcast_) { | ||||
| input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); | |||||
| input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_)); | |||||
| } | } | ||||
| if (!input1_broadcast_) { | if (!input1_broadcast_) { | ||||
| input1_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); | |||||
| input1_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_)); | |||||
| } | } | ||||
| auto output_tensor = out_tensors_.at(0); | auto output_tensor = out_tensors_.at(0); | ||||
| output_ptr_ = MallocOutputFp16(output_tensor, context_); | |||||
| output_ptr_ = MallocOutputFp16(output_tensor, static_cast<const lite::InnerContext *>(this->context_)); | |||||
| if (input0_ptr_ == nullptr || input1_ptr_ == nullptr || output_ptr_ == nullptr) { | if (input0_ptr_ == nullptr || input1_ptr_ == nullptr || output_ptr_ == nullptr) { | ||||
| FreeFp16Buffer(); | FreeFp16Buffer(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRun, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRun, this, | |||||
| context_->thread_num_); | |||||
| if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) { | if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) { | ||||
| Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()), | Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()), | ||||
| output_tensor->ElementsNum()); | output_tensor->ElementsNum()); | ||||
| @@ -77,13 +77,14 @@ int ArithmeticSelfFp16CPUKernel::Run() { | |||||
| auto output_tensor = out_tensors_.at(0); | auto output_tensor = out_tensors_.at(0); | ||||
| if (input_tensor->data_type() == kNumberTypeFloat32) { | if (input_tensor->data_type() == kNumberTypeFloat32) { | ||||
| input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, context_); | |||||
| input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(this->context_)); | |||||
| } else { | } else { | ||||
| input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | ||||
| } | } | ||||
| output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticSelfRun, | |||||
| this, op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -51,15 +51,16 @@ int BatchnormFp16CPUKernel::InitConstTensor() { | |||||
| int BatchnormFp16CPUKernel::Run() { | int BatchnormFp16CPUKernel::Run() { | ||||
| auto input_tensor = in_tensors_.at(0); | auto input_tensor = in_tensors_.at(0); | ||||
| auto output_tensor = out_tensors_.at(0); | auto output_tensor = out_tensors_.at(0); | ||||
| input_ = ConvertInputFp32toFp16(input_tensor, context_); | |||||
| output_ = MallocOutputFp16(output_tensor, context_); | |||||
| input_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(this->context_)); | |||||
| output_ = MallocOutputFp16(output_tensor, static_cast<const lite::InnerContext *>(this->context_)); | |||||
| if (input_ == nullptr || output_ == nullptr) { | if (input_ == nullptr || output_ == nullptr) { | ||||
| FreeInputAndOutput(); | FreeInputAndOutput(); | ||||
| MS_LOG(ERROR) << "input or output is nullptr"; | MS_LOG(ERROR) << "input or output is nullptr"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -132,7 +132,8 @@ int CastFp16CPUKernel::Run() { | |||||
| if (data_num_ == 0) { | if (data_num_ == 0) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ParallelLaunch(this->context_->thread_pool_, CastFp16Run, this, op_parameter_->thread_num_); | |||||
| return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CastFp16Run, this, | |||||
| op_parameter_->thread_num_); | |||||
| } | } | ||||
| REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, LiteKernelCreator<CastFp16CPUKernel>) | REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, LiteKernelCreator<CastFp16CPUKernel>) | ||||
| @@ -236,14 +236,16 @@ int Convolution1x1FP16CPUKernel::Run() { | |||||
| int ret = RET_ERROR; | int ret = RET_ERROR; | ||||
| if (multi_thread_by_hw_) { | if (multi_thread_by_hw_) { | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| Convolution1x1Fp16RunHw, this, thread_count_); | |||||
| } else { | } else { | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | ||||
| #else | #else | ||||
| RowMajor2Col12MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | RowMajor2Col12MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | ||||
| #endif | #endif | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunOc, this, thread_count_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| Convolution1x1Fp16RunOc, this, thread_count_); | |||||
| } | } | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ParallelLaunch failed."; | MS_LOG(ERROR) << "ParallelLaunch failed."; | ||||
| @@ -95,10 +95,11 @@ static void SetInputOutputShapeInfo(ConvParameter *conv_param, lite::Tensor *inp | |||||
| int ConvolutionDelegateFP16CPUKernel::ReSize() { | int ConvolutionDelegateFP16CPUKernel::ReSize() { | ||||
| // Update shape info of input and output | // Update shape info of input and output | ||||
| kernel::SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(), | kernel::SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(), | ||||
| out_tensors_.front(), context_); | |||||
| out_tensors_.front(), static_cast<const lite::InnerContext *>(this->context_)); | |||||
| if (fp16_conv_kernel_ == nullptr) { | if (fp16_conv_kernel_ == nullptr) { | ||||
| fp16_conv_kernel_ = | fp16_conv_kernel_ = | ||||
| CpuConvFp16KernelSelect(in_tensors_, out_tensors_, op_parameter_, context_, origin_weight_, origin_bias_); | |||||
| CpuConvFp16KernelSelect(in_tensors_, out_tensors_, op_parameter_, | |||||
| static_cast<const lite::InnerContext *>(context_), origin_weight_, origin_bias_); | |||||
| if (fp16_conv_kernel_ == nullptr) { | if (fp16_conv_kernel_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Selecting execute kernel failed for conv_kernel, got a nullptr."; | MS_LOG(ERROR) << "Selecting execute kernel failed for conv_kernel, got a nullptr."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -184,7 +185,7 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor | |||||
| /* creator func */ | /* creator func */ | ||||
| kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | ||||
| const InnerContext *ctx, const kernel::KernelKey &desc) { | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||||
| MS_ASSERT(opParameter != nullptr); | MS_ASSERT(opParameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); | ||||
| @@ -200,11 +201,12 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> & | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | ||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| if (conv_param->group_ == 1) { | if (conv_param->group_ == 1) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs, | |||||
| static_cast<const lite::InnerContext *>(ctx)); | |||||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | ||||
| kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, ctx); | |||||
| kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, static_cast<const lite::InnerContext *>(ctx)); | |||||
| } else { | } else { | ||||
| kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx); | |||||
| kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, static_cast<const lite::InnerContext *>(ctx)); | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| @@ -104,7 +104,8 @@ static int ConvDwFp16Run(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseFp16CPUKernel::Run() { | int ConvolutionDepthwiseFp16CPUKernel::Run() { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwFp16Run, this, | |||||
| conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -155,7 +155,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| packed_output_ = output_ptr; | packed_output_ = output_ptr; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwSWFp16Run, this, | |||||
| conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -144,7 +144,8 @@ int ConvolutionFP16CPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionFp16Impl, this, | |||||
| thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]"; | MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]"; | ||||
| } | } | ||||
| @@ -213,7 +213,8 @@ int ConvolutionWinogradFP16CPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| ConvolutionWinogradFp16Impl, this, thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -53,7 +53,8 @@ int CropFp16CPUKernel::Run() { | |||||
| input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | ||||
| output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, CropFp16Run, this, crop_para_->thread_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CropFp16Run, this, | |||||
| crop_para_->thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ParallelLaunch failed: " << ret; | MS_LOG(ERROR) << "ParallelLaunch failed: " << ret; | ||||
| } | } | ||||
| @@ -173,7 +173,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t)); | memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t)); | ||||
| packed_output_ = output_ptr; | packed_output_ = output_ptr; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeconvDwFp16Run, this, | |||||
| conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -217,7 +217,8 @@ int DeConvolutionFp16CPUKernel::Run() { | |||||
| RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_); | RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_); | ||||
| error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_); | |||||
| error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvFp16Run, | |||||
| this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]"; | ||||
| } | } | ||||
| @@ -229,7 +230,7 @@ int DeConvolutionFp16CPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | ||||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc) { | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||||
| MS_ASSERT(op_parameter != nullptr); | MS_ASSERT(op_parameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); | ||||
| @@ -238,12 +239,15 @@ kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> | |||||
| if (conv_param->group_ == 1) { | if (conv_param->group_ == 1) { | ||||
| if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && | if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && | ||||
| (conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) { | (conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) { | ||||
| kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, | |||||
| static_cast<const lite::InnerContext *>(ctx)); | |||||
| } else { | } else { | ||||
| kernel = new (std::nothrow) kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) | |||||
| kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||||
| } | } | ||||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | ||||
| kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) | |||||
| DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| @@ -392,10 +392,12 @@ int DeConvWinogradFp16CPUKernel::Run() { | |||||
| nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; | nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; | ||||
| ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t)); | ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t)); | ||||
| ParallelLaunch(this->context_->thread_pool_, DeConvWgFp16Run, this, deconv_param_->thread_num_); | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgFp16Run, this, | |||||
| deconv_param_->thread_num_); | |||||
| /*post bias activate and nhwc */ | /*post bias activate and nhwc */ | ||||
| ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp16Run, this, thread_num_hw_); | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgPostFp16Run, this, | |||||
| thread_num_hw_); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -151,7 +151,8 @@ int GatherFp16CPUKernel::Run() { | |||||
| Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum()); | Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum()); | ||||
| } | } | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, GatherRunFp16, this, op_parameter_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherRunFp16, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -109,7 +109,8 @@ int InstanceNormFp16Run(void *cdata, int task_id) { | |||||
| int InstanceNormFp16CPUKernel::Run() { | int InstanceNormFp16CPUKernel::Run() { | ||||
| src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c()); | src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c()); | ||||
| dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c()); | dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormFp16Run, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, InstanceNormFp16Run, | |||||
| this, op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]"; | MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -95,7 +95,8 @@ int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id) { | |||||
| int LogSoftmaxFp16CPUKernel::Run() { | int LogSoftmaxFp16CPUKernel::Run() { | ||||
| if (in_plane_size_ == 1) { | if (in_plane_size_ == 1) { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, LogSoftmaxLastAxisFp16Run, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| LogSoftmaxLastAxisFp16Run, this, context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "LogSoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret; | MS_LOG(ERROR) << "LogSoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret; | ||||
| } | } | ||||
| @@ -286,7 +286,8 @@ int MatmulBaseFP16CPUKernel::Run() { | |||||
| batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_; | batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_; | ||||
| batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_; | batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, MatmulBaseFP16Run, this, thread_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MatmulBaseFP16Run, | |||||
| this, thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "MatmulBaseFloatRun failed"; | MS_LOG(ERROR) << "MatmulBaseFloatRun failed"; | ||||
| return ret; | return ret; | ||||
| @@ -89,7 +89,8 @@ int PadFp16CPUKernel::Run() { | |||||
| output_[i] = pad_param_->constant_value_; | output_[i] = pad_param_->constant_value_; | ||||
| } | } | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PadImpl, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -101,7 +102,8 @@ int PadFp16CPUKernel::Run() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, MirrorPadImpl, this, context_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MirrorPadImpl, this, | |||||
| context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]"; | MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -90,7 +90,8 @@ int PoolingFp16CPUKernel::Run() { | |||||
| fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | ||||
| fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingFp16Impl, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| PoolingFp16Impl, this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -87,7 +87,8 @@ int PowerFp16CPUKernel::Run() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImplFp16, this, thread_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PowerImplFp16, this, | |||||
| thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret; | MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -164,7 +164,8 @@ int QuantDTypeCastFp16CPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, QuantDTypeCastFP16Run, this, thread_n_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| QuantDTypeCastFP16Run, this, thread_n_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -93,7 +93,8 @@ int ReduceFp16CPUKernel::Run() { | |||||
| outer_size_ = outer_sizes_.at(i); | outer_size_ = outer_sizes_.at(i); | ||||
| inner_size_ = inner_sizes_.at(i); | inner_size_ = inner_sizes_.at(i); | ||||
| axis_size_ = axis_sizes_.at(i); | axis_size_ = axis_sizes_.at(i); | ||||
| auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceFp16Impl, this, context_->thread_num_); | |||||
| auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| ReduceFp16Impl, this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; | ||||
| @@ -108,7 +109,8 @@ int ReduceFp16CPUKernel::Run() { | |||||
| outer_size_ = outer_sizes_.back(); | outer_size_ = outer_sizes_.back(); | ||||
| inner_size_ = inner_sizes_.back(); | inner_size_ = inner_sizes_.back(); | ||||
| axis_size_ = axis_sizes_.back(); | axis_size_ = axis_sizes_.back(); | ||||
| auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceFp16Impl, this, context_->thread_num_); | |||||
| auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| ReduceFp16Impl, this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; | ||||
| @@ -115,7 +115,8 @@ int ScaleFp16CPUKernel::Run() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, ScaleFp16Run, this, op_parameter_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScaleFp16Run, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| @@ -127,12 +128,12 @@ int ScaleFp16CPUKernel::Run() { | |||||
| } | } | ||||
| int ScaleFp16CPUKernel::MallocAssignTmpBuffer() { | int ScaleFp16CPUKernel::MallocAssignTmpBuffer() { | ||||
| scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); | |||||
| scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_)); | |||||
| if (scale_ == nullptr) { | if (scale_ == nullptr) { | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (in_tensors_.size() == 3) { | if (in_tensors_.size() == 3) { | ||||
| offset_ = ConvertInputFp32toFp16(in_tensors_.at(2), context_); | |||||
| offset_ = ConvertInputFp32toFp16(in_tensors_.at(2), static_cast<const lite::InnerContext *>(this->context_)); | |||||
| if (offset_ == nullptr) { | if (offset_ == nullptr) { | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -63,7 +63,8 @@ int SliceFp16CPUKernel::Run() { | |||||
| DoSliceNoParallel(input_data, out_tensors_.at(0)->data_c(), param_, lite::DataTypeSize(kNumberTypeFloat16)); | DoSliceNoParallel(input_data, out_tensors_.at(0)->data_c(), param_, lite::DataTypeSize(kNumberTypeFloat16)); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SliceFp16Launch, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SliceFp16Launch, | |||||
| this, op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "fp16 slice launch fail!ret: " << ret; | MS_LOG(ERROR) << "fp16 slice launch fail!ret: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -95,7 +95,8 @@ int SoftmaxLastAxisFp16Run(void *cdata, int task_id) { | |||||
| int SoftmaxFp16CPUKernel::Run() { | int SoftmaxFp16CPUKernel::Run() { | ||||
| if (in_plane_size_ == 1) { | if (in_plane_size_ == 1) { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SoftmaxLastAxisFp16Run, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| SoftmaxLastAxisFp16Run, this, context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "SoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret; | MS_LOG(ERROR) << "SoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret; | ||||
| } | } | ||||
| @@ -40,14 +40,15 @@ void StackFp16CPUKernel::InitMallocFlags() { | |||||
| int StackFp16CPUKernel::MallocAssignBuffer() { | int StackFp16CPUKernel::MallocAssignBuffer() { | ||||
| buffers_.resize(in_tensors_.size(), nullptr); | buffers_.resize(in_tensors_.size(), nullptr); | ||||
| for (size_t i = 0; i < in_tensors_.size(); ++i) { | for (size_t i = 0; i < in_tensors_.size(); ++i) { | ||||
| buffers_.at(i) = reinterpret_cast<char *>(ConvertInputFp32toFp16(in_tensors_.at(i), context_)); | |||||
| buffers_.at(i) = reinterpret_cast<char *>( | |||||
| ConvertInputFp32toFp16(in_tensors_.at(i), static_cast<const lite::InnerContext *>(context_))); | |||||
| if (buffers_.at(i) == nullptr) { | if (buffers_.at(i) == nullptr) { | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| out_buffer_ = nullptr; | out_buffer_ = nullptr; | ||||
| out_buffer_ = MallocOutputFp16(out_tensors_.at(0), context_); | |||||
| out_buffer_ = MallocOutputFp16(out_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_)); | |||||
| if (out_buffer_ == nullptr) { | if (out_buffer_ == nullptr) { | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -100,7 +101,8 @@ int StackFp16CPUKernel::Run() { | |||||
| } | } | ||||
| // run stack | // run stack | ||||
| num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_); | num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_); | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, StackRun, this, num_threads_); | |||||
| ret = | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StackRun, this, num_threads_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -79,7 +79,8 @@ int ActivationGradRunFp16(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ActivationGradCPUKernelFp16::Run() { | int ActivationGradCPUKernelFp16::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRunFp16, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| ActivationGradRunFp16, this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -73,7 +73,8 @@ int ArithmeticSelfGradFp16Run(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ArithmeticSelfGradFp16CPUKernel::Run() { | int ArithmeticSelfGradFp16CPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfGradFp16Run, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| ArithmeticSelfGradFp16Run, this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -101,7 +101,8 @@ int ActivationRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ActivationCPUKernel::Run() { | int ActivationCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationRun, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ActivationRun, | |||||
| this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -122,7 +122,8 @@ int AdderCPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, AdderImpl, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AdderImpl, | |||||
| this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "adder error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "adder error error_code[" << error_code << "]"; | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| @@ -89,7 +89,8 @@ int AddNCPUKernel::Run() { | |||||
| in1_addr_ = input0_data; | in1_addr_ = input0_data; | ||||
| in2_addr_ = input1_data; | in2_addr_ = input1_data; | ||||
| out_addr_ = output_data; | out_addr_ = output_data; | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AddNLaunch, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "addn launch fail!ret: " << ret; | MS_LOG(ERROR) << "addn launch fail!ret: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -97,7 +98,8 @@ int AddNCPUKernel::Run() { | |||||
| for (size_t i = 2; i < in_tensors_.size(); ++i) { | for (size_t i = 2; i < in_tensors_.size(); ++i) { | ||||
| in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData()); | in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData()); | ||||
| in2_addr_ = output_data; | in2_addr_ = output_data; | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AddNLaunch, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i; | MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -418,7 +418,8 @@ int ArithmeticCPUKernel::Run() { | |||||
| input1_ptr_ = in_tensors_[1]->data_c(); | input1_ptr_ = in_tensors_[1]->data_c(); | ||||
| } | } | ||||
| output_ptr_ = out_tensors_[0]->data_c(); | output_ptr_ = out_tensors_[0]->data_c(); | ||||
| return ParallelLaunch(this->context_->thread_pool_, ArithmeticsRun, this, context_->thread_num_); | |||||
| return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRun, this, | |||||
| context_->thread_num_); | |||||
| } | } | ||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulFusion, LiteKernelCreator<ArithmeticCPUKernel>) | REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulFusion, LiteKernelCreator<ArithmeticCPUKernel>) | ||||
| @@ -113,7 +113,8 @@ int ArithmeticSelfRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ArithmeticSelfCPUKernel::Run() { | int ArithmeticSelfCPUKernel::Run() { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticSelfRun, | |||||
| this, op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -75,7 +75,8 @@ int BatchnormCPUKernel::InitConstTensor() { | |||||
| } | } | ||||
| int BatchnormCPUKernel::Run() { | int BatchnormCPUKernel::Run() { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -141,7 +141,8 @@ int CastCPUKernel::Run() { | |||||
| if (data_num_ == 0) { | if (data_num_ == 0) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ParallelLaunch(this->context_->thread_pool_, CastRun, this, op_parameter_->thread_num_); | |||||
| return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CastRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| } | } | ||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, LiteKernelCreator<CastCPUKernel>) | REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, LiteKernelCreator<CastCPUKernel>) | ||||
| @@ -69,7 +69,8 @@ int ConcatRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConcatCPUKernel::Run() { | int ConcatCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ConcatRun, this, op_parameter_->thread_num_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConcatRun, | |||||
| this, op_parameter_->thread_num_); | |||||
| return error_code; | return error_code; | ||||
| } | } | ||||
| @@ -247,10 +247,12 @@ int Convolution1x1CPUKernel::Run() { | |||||
| } | } | ||||
| if (multi_thread_by_hw_) { | if (multi_thread_by_hw_) { | ||||
| ParallelLaunch(this->context_->thread_pool_, Convolution1x1RunHw, this, thread_count_); | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, Convolution1x1RunHw, this, | |||||
| thread_count_); | |||||
| } else { | } else { | ||||
| PackMatmulInput(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | PackMatmulInput(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | ||||
| ParallelLaunch(this->context_->thread_pool_, Convolution1x1Run, this, thread_count_); | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, Convolution1x1Run, this, | |||||
| thread_count_); | |||||
| } | } | ||||
| } | } | ||||
| @@ -138,16 +138,19 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() { | |||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_); | auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_); | ||||
| if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) { | if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) { | ||||
| kernel = new (std::nothrow) | |||||
| kernel::Convolution1x1CPUKernel(op_parameter_, in_tensors_, out_tensors_, context_, origin_weight_, origin_bias_); | |||||
| kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(op_parameter_, in_tensors_, out_tensors_, | |||||
| static_cast<const lite::InnerContext *>(this->context_), | |||||
| origin_weight_, origin_bias_); | |||||
| } else { | } else { | ||||
| int out_unit; | int out_unit; | ||||
| if (CheckIfUseWinograd(&out_unit, conv_param)) { | if (CheckIfUseWinograd(&out_unit, conv_param)) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel( | kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel( | ||||
| op_parameter_, in_tensors_, out_tensors_, context_, out_unit, origin_weight_, origin_bias_); | |||||
| op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->context_), out_unit, | |||||
| origin_weight_, origin_bias_); | |||||
| } else { | } else { | ||||
| kernel = new (std::nothrow) | |||||
| kernel::ConvolutionCPUKernel(op_parameter_, in_tensors_, out_tensors_, context_, origin_weight_, origin_bias_); | |||||
| kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(op_parameter_, in_tensors_, out_tensors_, | |||||
| static_cast<const lite::InnerContext *>(this->context_), | |||||
| origin_weight_, origin_bias_); | |||||
| } | } | ||||
| } | } | ||||
| @@ -214,7 +217,7 @@ kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector<lite::Tensor | |||||
| /* creator func */ | /* creator func */ | ||||
| kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | ||||
| const InnerContext *ctx, const kernel::KernelKey &desc) { | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||||
| MS_ASSERT(op_parameter != nullptr); | MS_ASSERT(op_parameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); | ||||
| MS_ASSERT(desc.data_type == kNumberTypeFloat32); | MS_ASSERT(desc.data_type == kNumberTypeFloat32); | ||||
| @@ -222,11 +225,12 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> & | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | ||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| if (conv_param->group_ == 1) { | if (conv_param->group_ == 1) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionDelegateCPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) | |||||
| kernel::ConvolutionDelegateCPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | ||||
| kernel = CpuConvDwFp32KernelCreator(inputs, outputs, op_parameter, ctx); | |||||
| kernel = CpuConvDwFp32KernelCreator(inputs, outputs, op_parameter, static_cast<const lite::InnerContext *>(ctx)); | |||||
| } else { | } else { | ||||
| kernel = CpuGroupConvFp32KernelCreator(inputs, outputs, op_parameter, ctx); | |||||
| kernel = CpuGroupConvFp32KernelCreator(inputs, outputs, op_parameter, static_cast<const lite::InnerContext *>(ctx)); | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| @@ -126,7 +126,8 @@ int ConvolutionDepthwise3x3CPUKernel::Run() { | |||||
| auto output_tensor = out_tensors_.at(kOutputIndex); | auto output_tensor = out_tensors_.at(kOutputIndex); | ||||
| output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c()); | output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDw3x3Run, this, conv_param_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDw3x3Run, this, | |||||
| conv_param_->thread_num_); | |||||
| ctx_->allocator->Free(buffer_); | ctx_->allocator->Free(buffer_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]"; | ||||
| @@ -107,7 +107,8 @@ int ConvolutionDepthwiseCPUKernel::Run() { | |||||
| auto output_tensor = out_tensors_.at(kOutputIndex); | auto output_tensor = out_tensors_.at(kOutputIndex); | ||||
| output_ptr_ = reinterpret_cast<float *>(output_tensor->MutableData()); | output_ptr_ = reinterpret_cast<float *>(output_tensor->MutableData()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwRun, this, conv_param_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwRun, this, | |||||
| conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -194,7 +194,8 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() { | |||||
| ConvDwInitIndirection(indirect_buffer_, packed_input_, zero_ptr_, conv_param_, step_h, step_w); | ConvDwInitIndirection(indirect_buffer_, packed_input_, zero_ptr_, conv_param_, step_h, step_w); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwIndirectRun, this, conv_param_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwIndirectRun, | |||||
| this, conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwIndirectRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwIndirectRun error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -163,7 +163,8 @@ int ConvolutionDepthwiseSWCPUKernel::Run() { | |||||
| packed_output_ = output_ptr; | packed_output_ = output_ptr; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwSWRun, this, | |||||
| conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -152,7 +152,8 @@ int ConvolutionCPUKernel::Run() { | |||||
| PackWeight(); | PackWeight(); | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionImpl, this, | |||||
| thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "conv error error_code[" << ret << "]"; | MS_LOG(ERROR) << "conv error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -219,7 +219,8 @@ int ConvolutionWinogradCPUKernel::Run() { | |||||
| InitWeightBias(); | InitWeightBias(); | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionWinogradImpl, | |||||
| this, thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -151,7 +151,8 @@ int CropAndResizeCPUKernel::Run() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, CropAndResizeImpl, this, context_->thread_num_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| CropAndResizeImpl, this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "CropAndResize run error, error_code[" << error_code << "]"; | MS_LOG(ERROR) << "CropAndResize run error, error_code[" << error_code << "]"; | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| @@ -62,7 +62,8 @@ int CropCPUKernel::Run() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, CropLaunch, this, crop_para_->thread_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CropLaunch, this, | |||||
| crop_para_->thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; | MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -168,7 +168,8 @@ int DeconvolutionDepthwiseCPUKernel::Run() { | |||||
| packed_output_ = output_addr; | packed_output_ = output_addr; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeconvDwRun, this, | |||||
| conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -222,7 +222,8 @@ int DeConvolutionCPUKernel::Run() { | |||||
| RowMajor2Col12Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | RowMajor2Col12Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); | ||||
| #endif | #endif | ||||
| error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_); | |||||
| error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvFp32Run, | |||||
| this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; | ||||
| FreeRunBuf(); | FreeRunBuf(); | ||||
| @@ -236,7 +237,7 @@ int DeConvolutionCPUKernel::Run() { | |||||
| kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | ||||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc) { | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||||
| MS_ASSERT(op_parameter != nullptr); | MS_ASSERT(op_parameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); | ||||
| @@ -245,12 +246,15 @@ kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *> | |||||
| if (conv_param->group_ == 1) { | if (conv_param->group_ == 1) { | ||||
| if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && | if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && | ||||
| (conv_param->dilation_w_ == 1 && conv_param->dilation_h_ == 1)) { | (conv_param->dilation_w_ == 1 && conv_param->dilation_h_ == 1)) { | ||||
| kernel = new (std::nothrow) kernel::DeConvolutionWinogradCPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::DeConvolutionWinogradCPUKernel(op_parameter, inputs, outputs, | |||||
| static_cast<const lite::InnerContext *>(ctx)); | |||||
| } else { | } else { | ||||
| kernel = new (std::nothrow) kernel::DeConvolutionCPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) | |||||
| kernel::DeConvolutionCPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||||
| } | } | ||||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | ||||
| kernel = new (std::nothrow) kernel::DeconvolutionDepthwiseCPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) kernel::DeconvolutionDepthwiseCPUKernel(op_parameter, inputs, outputs, | |||||
| static_cast<const lite::InnerContext *>(ctx)); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "deconv do not support group deconv!"; | MS_LOG(ERROR) << "deconv do not support group deconv!"; | ||||
| kernel = nullptr; | kernel = nullptr; | ||||
| @@ -411,10 +411,12 @@ int DeConvolutionWinogradCPUKernel::Run() { | |||||
| nhwc_output_ = src_out + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; | nhwc_output_ = src_out + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; | ||||
| ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float)); | ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float)); | ||||
| ParallelLaunch(this->context_->thread_pool_, DeConvWgFp32Run, this, deconv_param_->thread_num_); | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgFp32Run, this, | |||||
| deconv_param_->thread_num_); | |||||
| /*post bias activate and nhwc */ | /*post bias activate and nhwc */ | ||||
| ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp32Run, this, thread_num_hw_); | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgPostFp32Run, this, | |||||
| thread_num_hw_); | |||||
| } | } | ||||
| FreeRunBuf(); | FreeRunBuf(); | ||||
| @@ -55,7 +55,8 @@ int EluRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int EluCPUKernel::Run() { | int EluCPUKernel::Run() { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, EluRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -87,7 +87,8 @@ int EmbeddingLookupCPUKernel::Run() { | |||||
| memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum()); | memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum()); | ||||
| dest_loc += in_tensors_.at(i)->ElementsNum(); | dest_loc += in_tensors_.at(i)->ElementsNum(); | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, EmbeddingLookupRun, | |||||
| this, op_parameter_->thread_num_); | |||||
| FreeRunBuff(); | FreeRunBuff(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]"; | ||||
| @@ -73,7 +73,8 @@ int ExpCPUKernel::Run() { | |||||
| output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | ||||
| exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum(); | exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum(); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ExpRun, this, exp_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ExpRun, this, | |||||
| exp_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -91,7 +91,8 @@ int FillCPUKernel::Run() { | |||||
| MS_LOG(ERROR) << "unsupported fill data type " << fill_input->data_type(); | MS_LOG(ERROR) << "unsupported fill data type " << fill_input->data_type(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, FillRun, this, thread_sz_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, FillRun, this, | |||||
| thread_sz_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -93,7 +93,8 @@ int FusedBatchnormCPUKernel::Run() { | |||||
| trained_ = true; // trained at least once | trained_ = true; // trained at least once | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -128,7 +128,8 @@ int GatherNdCPUKernel::Run() { | |||||
| in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | ||||
| out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | ||||
| InitOffset(); | InitOffset(); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, GatherNdRun, this, thread_sz_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherNdRun, this, | |||||
| thread_sz_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]"; | MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -92,7 +92,8 @@ int GatherCPUKernel::Run() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, GatherRun, this, op_parameter_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -66,7 +66,8 @@ int InstanceNormCPUKernel::Run() { | |||||
| gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c()); | gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c()); | ||||
| beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c()); | beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c()); | ||||
| dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, InstanceNormRun, | |||||
| this, op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]"; | MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -146,7 +146,8 @@ int L2NormCPUKernel::Run() { | |||||
| output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()); | output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()); | ||||
| if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) { | if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) { | ||||
| // all axis | // all axis | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SquareSumRun, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SquareSumRun, this, | |||||
| context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -156,13 +157,15 @@ int L2NormCPUKernel::Run() { | |||||
| sum += tmp_sum_[i]; | sum += tmp_sum_[i]; | ||||
| } | } | ||||
| sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_); | sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_); | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, L2NormRun, this, context_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, L2NormRun, this, | |||||
| context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) { | } else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, L2NormTrailingAxisRun, this, context_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| L2NormTrailingAxisRun, this, context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -92,7 +92,8 @@ int LayerNormCPUKernel::Run() { | |||||
| mean_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float))); | mean_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float))); | ||||
| var_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float))); | var_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float))); | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, LayerNormRun, this, op_parameter_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LayerNormRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (out_tensors_.size() != 3) { | if (out_tensors_.size() != 3) { | ||||
| context_->allocator->Free(mean_data_); | context_->allocator->Free(mean_data_); | ||||
| context_->allocator->Free(var_data_); | context_->allocator->Free(var_data_); | ||||
| @@ -74,7 +74,8 @@ int LocalResponseNormRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int LocalResponseNormCPUKernel::Run() { | int LocalResponseNormCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, LocalResponseNormRun, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| LocalResponseNormRun, this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -96,7 +96,8 @@ int LogSoftmaxLastAxisRun(void *cdata, int task_id) { | |||||
| int LogSoftmaxCPUKernel::Run() { | int LogSoftmaxCPUKernel::Run() { | ||||
| int ret = RET_OK; | int ret = RET_OK; | ||||
| if (in_plane_size_ == 1) { | if (in_plane_size_ == 1) { | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, LogSoftmaxLastAxisRun, this, context_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LogSoftmaxLastAxisRun, | |||||
| this, context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "LogSoftmaxCPUKernel ParallelLaunch failed, ret: " << ret; | MS_LOG(ERROR) << "LogSoftmaxCPUKernel ParallelLaunch failed, ret: " << ret; | ||||
| } | } | ||||
| @@ -61,7 +61,8 @@ int LshProjectionCPUKernel::Run() { | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, LshProjectionRun, this, op_parameter_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LshProjectionRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "LshProjection kernel parallel launch failed"; | MS_LOG(ERROR) << "LshProjection kernel parallel launch failed"; | ||||
| } | } | ||||
| @@ -332,7 +332,8 @@ int MatmulFp32BaseCPUKernel::Run() { | |||||
| batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_; | batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_; | ||||
| batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_; | batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, MatmulBaseFloatRun, this, thread_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MatmulBaseFloatRun, | |||||
| this, thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "MatmulBaseFloatRun failed"; | MS_LOG(ERROR) << "MatmulBaseFloatRun failed"; | ||||
| return ret; | return ret; | ||||
| @@ -181,7 +181,8 @@ int OneHotCPUKernel::GetParams() { | |||||
| } | } | ||||
| int OneHotCPUKernel::Run() { | int OneHotCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, RunOneHot, this, context_->thread_num_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, RunOneHot, | |||||
| this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -395,7 +395,8 @@ int PadCPUKernel::Run() { | |||||
| output_data[i] = pad_param_->constant_value_; | output_data[i] = pad_param_->constant_value_; | ||||
| } | } | ||||
| } | } | ||||
| error_code = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, context_->thread_num_); | |||||
| error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PadImpl, this, | |||||
| context_->thread_num_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -408,7 +409,8 @@ int PadCPUKernel::Run() { | |||||
| return error_code; | return error_code; | ||||
| } | } | ||||
| error_code = ParallelLaunch(this->context_->thread_pool_, MirrorPadImpl, this, context_->thread_num_); | |||||
| error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MirrorPadImpl, | |||||
| this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -85,7 +85,8 @@ int PoolingImpl(void *cdata, int task_id) { | |||||
| } | } | ||||
| int PoolingCPUKernel::Run() { | int PoolingCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingImpl, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PoolingImpl, | |||||
| this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -41,7 +41,8 @@ int PowerImpl(void *cdata, int task_id) { | |||||
| } | } | ||||
| int PowerCPUKernel::Run() { | int PowerCPUKernel::Run() { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImpl, this, thread_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PowerImpl, this, | |||||
| thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "PowerCPUKernel error: " << ret; | MS_LOG(ERROR) << "PowerCPUKernel error: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -93,7 +93,8 @@ int PReluCPUKernel::Run() { | |||||
| auto negative_slope_tensor = in_tensors_.at(1); | auto negative_slope_tensor = in_tensors_.at(1); | ||||
| prelu_param_->slope_ = reinterpret_cast<float *>(negative_slope_tensor->data_c()); | prelu_param_->slope_ = reinterpret_cast<float *>(negative_slope_tensor->data_c()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, PReluRun, this, prelu_param_->op_parameter_.thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PReluRun, this, | |||||
| prelu_param_->op_parameter_.thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "PRelu Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "PRelu Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -117,7 +117,8 @@ int ReduceCPUKernel::Run() { | |||||
| outer_size_ = outer_sizes_.at(i); | outer_size_ = outer_sizes_.at(i); | ||||
| inner_size_ = inner_sizes_.at(i); | inner_size_ = inner_sizes_.at(i); | ||||
| axis_size_ = axis_sizes_.at(i); | axis_size_ = axis_sizes_.at(i); | ||||
| auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceImpl, this, context_->thread_num_); | |||||
| auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReduceImpl, | |||||
| this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| @@ -205,7 +205,8 @@ int ResizeCPUKernel::RunImpl(int task_id) { | |||||
| } | } | ||||
| int ResizeCPUKernel::Run() { | int ResizeCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ResizeImpl, this, context_->thread_num_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ResizeImpl, | |||||
| this, context_->thread_num_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| @@ -129,7 +129,8 @@ int ReverseCPUKernel::DoReverse(int task_id) { | |||||
| int ReverseCPUKernel::Run() { | int ReverseCPUKernel::Run() { | ||||
| in_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | in_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); | ||||
| out_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData()); | out_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ReverseRun, this, thread_sz_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReverseRun, this, | |||||
| thread_sz_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -101,7 +101,8 @@ int ROIPoolingCPUKernel::Run() { | |||||
| in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | ||||
| out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | ||||
| roi_ptr_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | roi_ptr_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ROIPoolingRun, this, param_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ROIPoolingRun, this, | |||||
| param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -188,7 +188,8 @@ int ScaleCPUKernel::Run() { | |||||
| auto out_tensor = out_tensors_.front(); | auto out_tensor = out_tensors_.front(); | ||||
| output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData()); | output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData()); | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScaleRun, this, | |||||
| op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -149,7 +149,8 @@ int ScatterNDRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ScatterNDCPUKernel::Run() { | int ScatterNDCPUKernel::Run() { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ScatterNDRun, this, thread_n_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScatterNDRun, this, | |||||
| thread_n_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ScatterND error error_code[" << ret << "]"; | MS_LOG(ERROR) << "ScatterND error error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -96,7 +96,8 @@ int SoftmaxLastAxisRun(void *cdata, int task_id) { | |||||
| int SoftmaxCPUKernel::Run() { | int SoftmaxCPUKernel::Run() { | ||||
| int ret = RET_OK; | int ret = RET_OK; | ||||
| if (in_plane_size_ == 1) { | if (in_plane_size_ == 1) { | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, SoftmaxLastAxisRun, this, context_->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SoftmaxLastAxisRun, | |||||
| this, context_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "SoftmaxCPUKernel ParallelLaunch failed, ret: " << ret; | MS_LOG(ERROR) << "SoftmaxCPUKernel ParallelLaunch failed, ret: " << ret; | ||||
| } | } | ||||
| @@ -102,7 +102,8 @@ int SpaceToBatchCPUKernel::Run() { | |||||
| } | } | ||||
| } | } | ||||
| ParallelLaunch(this->context_->thread_pool_, SpaceToBatchFp32Run, this, op_parameter_->thread_num_); | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SpaceToBatchFp32Run, this, | |||||
| op_parameter_->thread_num_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -93,7 +93,8 @@ int SpaceToDepthCPUKernel::Run() { | |||||
| input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->data_c()); | input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->data_c()); | ||||
| output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | ||||
| if (in_tensors_.at(0)->format() == schema::Format::Format_NHWC) { | if (in_tensors_.at(0)->format() == schema::Format::Format_NHWC) { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, SpaceToDepthRun, this, thread_h_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SpaceToDepthRun, | |||||
| this, thread_h_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "SpaceToDepth error error_code[" << ret << "]"; | MS_LOG(ERROR) << "SpaceToDepth error error_code[" << ret << "]"; | ||||
| return ret; | return ret; | ||||
| @@ -175,7 +175,8 @@ int SparseToDenseCPUKernel::Run() { | |||||
| } | } | ||||
| output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | ||||
| count_unit_ = thread_count_ > 1 ? UP_DIV(index_num, thread_count_) : index_num; | count_unit_ = thread_count_ > 1 ? UP_DIV(index_num, thread_count_) : index_num; | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, SparseToDenseRun, this, s2d_param->thread_num_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SparseToDenseRun, this, | |||||
| s2d_param->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "SparseToDenseRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "SparseToDenseRun error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -159,7 +159,8 @@ int TransposeCPUKernel::Run() { | |||||
| thread_count_ = op_parameter_->thread_num_; | thread_count_ = op_parameter_->thread_num_; | ||||
| GetNHNCTransposeFunc(in_tensor, out_tensor, param_); | GetNHNCTransposeFunc(in_tensor, out_tensor, param_); | ||||
| if (NHNCTransposeFunc_ != nullptr) { | if (NHNCTransposeFunc_ != nullptr) { | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, TransposeImpl, this, thread_count_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, TransposeImpl, | |||||
| this, thread_count_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "NHNCTransposeFunc_ is error!"; | MS_LOG(ERROR) << "NHNCTransposeFunc_ is error!"; | ||||
| } | } | ||||
| @@ -187,7 +188,8 @@ int TransposeCPUKernel::Run() { | |||||
| } | } | ||||
| int ret; | int ret; | ||||
| if (dims_ > MAX_TRANSPOSE_DIM_SIZE) { | if (dims_ > MAX_TRANSPOSE_DIM_SIZE) { | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, TransposeImpl, this, thread_count_); | |||||
| ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, TransposeImpl, this, | |||||
| thread_count_); | |||||
| } else { | } else { | ||||
| ret = DoTransposeFp32(in_data_, out_data_, out_shape_, param_); | ret = DoTransposeFp32(in_data_, out_data_, out_shape_, param_); | ||||
| } | } | ||||
| @@ -133,7 +133,8 @@ int WhereCPUKernel::RunWithTripleInputs() { | |||||
| MS_LOG(ERROR) << "Error, inputs' length are zero !!!"; | MS_LOG(ERROR) << "Error, inputs' length are zero !!!"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, WhereRun, this, where_param_->thread_num_); | |||||
| auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, WhereRun, this, | |||||
| where_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -98,7 +98,8 @@ int ActivationGradRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ActivationGradCPUKernel::Run() { | int ActivationGradCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRun, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| ActivationGradRun, this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -102,7 +102,8 @@ int AdamRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int AdamCPUKernel::Run() { | int AdamCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, AdamRun, this, thread_count_); | |||||
| int error_code = | |||||
| ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AdamRun, this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Adam function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Adam function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -145,9 +146,10 @@ int AdamCPUKernel::OptimizerStep() { | |||||
| kernel::LiteKernel *CpuAdamFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuAdamFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | ||||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc) { | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Adam); | MS_ASSERT(desc.type == schema::PrimitiveType_Adam); | ||||
| auto *kernel = new (std::nothrow) AdamCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = | |||||
| new (std::nothrow) AdamCPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new AdamCPUKernel fail!"; | MS_LOG(ERROR) << "new AdamCPUKernel fail!"; | ||||
| free(opParameter); | free(opParameter); | ||||
| @@ -82,7 +82,8 @@ int ApplyMomentumRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ApplyMomentumCPUKernel::Run() { | int ApplyMomentumCPUKernel::Run() { | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ApplyMomentumRun, this, thread_count_); | |||||
| int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, | |||||
| ApplyMomentumRun, this, thread_count_); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "Apply Momentum function error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "Apply Momentum function error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -119,10 +120,11 @@ int ApplyMomentumCPUKernel::OptimizerStep() { | |||||
| kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::InnerContext *ctx, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc) { | const kernel::KernelKey &desc) { | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_ApplyMomentum); | MS_ASSERT(desc.type == schema::PrimitiveType_ApplyMomentum); | ||||
| auto *kernel = new (std::nothrow) ApplyMomentumCPUKernel(opParameter, inputs, outputs, ctx); | |||||
| auto *kernel = new (std::nothrow) | |||||
| ApplyMomentumCPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "new ApplyMomentumCPUKernel fail!"; | MS_LOG(ERROR) << "new ApplyMomentumCPUKernel fail!"; | ||||
| free(opParameter); | free(opParameter); | ||||