diff --git a/mindspore/lite/nnacl/fp32/elu_fp32.c b/mindspore/lite/nnacl/fp32/elu_fp32.c index 87a3e27e22..aed9283eec 100644 --- a/mindspore/lite/nnacl/fp32/elu_fp32.c +++ b/mindspore/lite/nnacl/fp32/elu_fp32.c @@ -23,7 +23,7 @@ void Calculate_Data(const float *input_data, float *output_data, int num, EluPar } int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) { - for (size_t i = task_id; i < parameter->in_size_; i += parameter->thread_num_) { + for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) { Calculate_Data(input_data, output_data, i, parameter); } return NNACL_OK; diff --git a/mindspore/lite/nnacl/fp32/elu_fp32.h b/mindspore/lite/nnacl/fp32/elu_fp32.h index d60826135f..2a941ee7c5 100644 --- a/mindspore/lite/nnacl/fp32/elu_fp32.h +++ b/mindspore/lite/nnacl/fp32/elu_fp32.h @@ -22,7 +22,6 @@ typedef struct EluParameter { OpParameter op_parameter_; float alpha_; - int thread_num_; int in_size_; } EluParameter; diff --git a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c index 858700b856..7abb2948fa 100644 --- a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c +++ b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c @@ -47,7 +47,7 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding } int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) { - for (size_t i = task_id; i < parameter->ids_size_; i += parameter->thread_num) { + for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) { int ret = CopyData(input_data, ids, output_data, i, parameter); if (ret != NNACL_OK) { return ret; diff --git a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h index 0eeda37185..e984ee5c84 100644 --- a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h +++ b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h @@ -26,7 +26,6 @@ typedef struct EmbeddingLookupParameter { int ids_size_; int layer_size_; int layer_num_; - int thread_num; } EmbeddingLookupParameter; #ifdef __cplusplus diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc index 3844464cd9..0de96878ec 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc @@ -40,7 +40,7 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() { } } -int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() { +int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() { if (conv_param_->input_channel_ % C8NUM != 0) { need_align_ = true; int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); @@ -142,19 +142,17 @@ static int ConvDwSWFp16Run(void *cdata, int task_id) { } int ConvolutionDepthwiseSWFp16CPUKernel::Run() { - auto ret = InitBuffer(); + auto ret = InitPackedInputOutput(); if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; - context_->allocator->Free(packed_input_); - context_->allocator->Free(packed_output_); + MS_LOG(ERROR) << "Convolution depthwise fp16 InitPackedInputOutput failed."; + FreePackedInputOutput(); return ret; } ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); if (ret != RET_OK) { MS_LOG(ERROR) << "Get Execute tensor failed."; - context_->allocator->Free(packed_input_); - context_->allocator->Free(packed_output_); + FreePackedInputOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); return ret; } @@ -173,11 +171,19 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { if (need_align_) { PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); - context_->allocator->Free(packed_input_); - context_->allocator->Free(packed_output_); } ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - return RET_OK; + FreePackedInputOutput(); + return ret; +} + +void ConvolutionDepthwiseSWFp16CPUKernel::FreePackedInputOutput() { + if (need_align_) { + context_->allocator->Free(packed_input_); + context_->allocator->Free(packed_output_); + packed_input_ = nullptr; + packed_output_ = nullptr; + } } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h index 15e1169ed5..7f44731930 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h @@ -45,11 +45,12 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel int ReSize() override; int Run() override; - int InitBuffer(); + int InitPackedInputOutput(); int InitWeightBias(); int Execute(int task_id); private: + void FreePackedInputOutput(); SlidingWindowParam *sliding_ = nullptr; float16_t *packed_weight_ = nullptr; float16_t *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index 44c97598f3..54bceb7514 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -53,7 +53,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { return RET_OK; } -int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() { +int DeconvolutionDepthwiseFp16CPUKernel::InitPackedInputOutput() { if (conv_param_->input_channel_ % C8NUM != 0) { need_align_ = true; int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); @@ -156,19 +156,17 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { MS_LOG(ERROR) << "Only support input channel equals output channel."; return RET_ERROR; } - auto ret = InitBuffer(); + auto ret = InitPackedInputOutput(); if (ret != 0) { - MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; - context_->allocator->Free(packed_input_); - context_->allocator->Free(packed_output_); + MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitPackedInputOutput failed."; + FreePackedInputOutput(); return RET_ERROR; } ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); if (ret != RET_OK) { MS_LOG(ERROR) << "Get Execute tensor failed."; - context_->allocator->Free(packed_input_); - context_->allocator->Free(packed_output_); + FreePackedInputOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); return ret; } @@ -191,14 +189,22 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { if (need_align_) { PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); - context_->allocator->Free(packed_input_); - context_->allocator->Free(packed_output_); } ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); + FreePackedInputOutput(); return ret; } +void DeconvolutionDepthwiseFp16CPUKernel::FreePackedInputOutput() { + if (need_align_) { + context_->allocator->Free(packed_input_); + context_->allocator->Free(packed_output_); + packed_input_ = nullptr; + packed_output_ = nullptr; + } +} + kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h index f301fd0a40..71f81d5e98 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h @@ -46,12 +46,13 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel int ReSize() override; int Run() override; - int InitBuffer(); + int InitPackedInputOutput(); int InitWeightBias(); int InitSlideParam(); int Execute(int task_id); private: + void FreePackedInputOutput(); SlidingWindowParam *sliding_ = nullptr; float16_t *packed_weight_ = nullptr; float16_t *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc index 5ed33e0d18..e886b3da71 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc @@ -183,7 +183,7 @@ int DeConvolutionFp16CPUKernel::Run() { int error_code = InitRunBuf(); if (error_code != RET_OK) { - MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; + MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]"; ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); FreeRunBuf(); return RET_ERROR; @@ -197,7 +197,7 @@ int DeConvolutionFp16CPUKernel::Run() { error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_); if (error_code != RET_OK) { - MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; + MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]"; } } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc index e15d1a5502..267196f68a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc @@ -70,7 +70,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() { return RET_OK; } -int ConvolutionDepthwiseSWCPUKernel::InitBuffer() { +int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() { if (conv_param_->input_channel_ % C4NUM != 0) { need_align_ = true; int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); @@ -134,9 +134,10 @@ int ConvDwSWRun(void *cdata, int task_id) { } int ConvolutionDepthwiseSWCPUKernel::Run() { - auto ret = InitBuffer(); + auto ret = InitPackedInputOutput(); if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; + MS_LOG(ERROR) << "Convolution depthwise fp32 InitPackedInputOutput failed."; + FreePackedInputOutput(); return RET_ERROR; } auto input_tensor = in_tensors_.at(kInputIndex); @@ -159,16 +160,22 @@ int ConvolutionDepthwiseSWCPUKernel::Run() { ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]"; - return RET_ERROR; } if (need_align_) { PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + } + FreePackedInputOutput(); + return ret; +} + +void ConvolutionDepthwiseSWCPUKernel::FreePackedInputOutput() { + if (need_align_) { context_->allocator->Free(packed_input_); context_->allocator->Free(packed_output_); + packed_input_ = nullptr; + packed_output_ = nullptr; } - - return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h index ac40964498..12c8cbc1dc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h @@ -35,11 +35,12 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel { int ReSize() override; int Run() override; - int InitBuffer(); int InitWeightBias(); int Execute(int task_id); private: + int InitPackedInputOutput(); + void FreePackedInputOutput(); SlidingWindowParam *sliding_ = nullptr; float *packed_weight_ = nullptr; float *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc index 3418c404a6..ae12237fba 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc @@ -146,21 +146,20 @@ int ConvolutionCPUKernel::Run() { auto ret = InitTmpBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init tmp buffer failed."; + FreeTmpBuffer(); return RET_ERROR; } - int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "conv error error_code[" << error_code << "]"; - FreeTmpBuffer(); - return RET_ERROR; + ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "conv error error_code[" << ret << "]"; } FreeTmpBuffer(); - return RET_OK; + return ret; } ConvParameter *CreateNewConvParameter(ConvParameter *parameter) { - auto conv_parameter = reinterpret_cast(malloc(sizeof(ConvParameter))); + auto conv_parameter = new (std::nothrow) ConvParameter; if (conv_parameter == nullptr) { MS_LOG(ERROR) << "Malloc new conv parameter failed."; return nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc index 121b7797d5..bfb91c49ff 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc @@ -222,17 +222,16 @@ int ConvolutionWinogradCPUKernel::Run() { auto ret = InitTmpBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init tmp buffer failed."; + FreeTmpBuffer(); return RET_ERROR; } - int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]"; - FreeTmpBuffer(); - return RET_ERROR; + ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; } FreeTmpBuffer(); - return RET_OK; + return ret; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc index 5af04ba8e7..5897492979 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc @@ -82,7 +82,7 @@ int DeconvolutionDepthwiseCPUKernel::InitWeightBias() { return RET_OK; } -int DeconvolutionDepthwiseCPUKernel::InitBuffer() { +int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() { if (conv_param_->input_channel_ % C4NUM != 0) { need_align_ = true; int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); @@ -151,9 +151,10 @@ int DeconvolutionDepthwiseCPUKernel::Run() { return RET_ERROR; } - auto ret = InitBuffer(); + auto ret = InitPackedInputOutput(); if (ret != 0) { - MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; + MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitPackedInputOutput failed.ret: " << ret; + FreePackedInputOutput(); return ret; } @@ -176,16 +177,23 @@ int DeconvolutionDepthwiseCPUKernel::Run() { ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; - return RET_ERROR; } if (need_align_) { PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + } + FreePackedInputOutput(); + return ret; +} + +void DeconvolutionDepthwiseCPUKernel::FreePackedInputOutput() { + if (need_align_) { context_->allocator->Free(packed_input_); context_->allocator->Free(packed_output_); + packed_input_ = nullptr; + packed_output_ = nullptr; } - return RET_OK; } kernel::LiteKernel *CpuDeconvDwFp32KernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h index 350584a2b7..4b48db40f1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h @@ -36,11 +36,12 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { int ReSize() override; int Run() override; - int InitBuffer(); int InitWeightBias(); int Execute(int task_id); private: + int InitPackedInputOutput(); + void FreePackedInputOutput(); SlidingWindowParam *sliding_ = nullptr; float *packed_weight_ = nullptr; float *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc index 0f027fe238..a8654ddc31 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc @@ -202,6 +202,7 @@ int DeConvolutionCPUKernel::Run() { int error_code = InitRunBuf(); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; + FreeRunBuf(); return error_code; } @@ -218,6 +219,7 @@ int DeConvolutionCPUKernel::Run() { error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; + FreeRunBuf(); return error_code; } } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc index f6392aaae1..b5aa53bd6e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc @@ -390,6 +390,7 @@ int DeConvolutionWinogradCPUKernel::Run() { auto ret = InitRunBuf(); if (ret != RET_OK) { MS_LOG(ERROR) << "InitRunBuf fail!ret: " << ret; + FreeRunBuf(); return ret; } @@ -410,5 +411,4 @@ int DeConvolutionWinogradCPUKernel::Run() { FreeRunBuf(); return RET_OK; } - } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc index 60d99118d4..3b0642f1fe 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc @@ -26,13 +26,9 @@ using mindspore::schema::PrimitiveType_Elu; namespace mindspore::kernel { int EluCPUKernel::Init() { - elu_parameter_ = reinterpret_cast(op_parameter_); - elu_parameter_->thread_num_ = thread_count_; - if (!InferShapeDone()) { return RET_OK; } - return ReSize(); } @@ -42,6 +38,8 @@ int EluCPUKernel::ReSize() { } int EluCPUKernel::DoExcute(int task_id) { + auto input_addr = reinterpret_cast(in_tensors_.front()->MutableData()); + auto output_addr = reinterpret_cast(out_tensors_.front()->MutableData()); Elu(input_addr, output_addr, elu_parameter_, task_id); return RET_OK; } @@ -57,10 +55,7 @@ int EluRun(void *cdata, int task_id) { } int EluCPUKernel::Run() { - input_addr = reinterpret_cast(in_tensors_.front()->MutableData()); - output_addr = reinterpret_cast(out_tensors_.front()->MutableData()); - - auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, elu_parameter_->thread_num_); + auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]"; return RET_ERROR; @@ -72,16 +67,6 @@ kernel::LiteKernel *CpuEluFp32KernelCreator(const std::vector &i const std::vector &outputs, OpParameter *parameter, const lite::InnerContext *ctx, const KernelKey &desc, const mindspore::lite::PrimitiveC *primitive) { - if (parameter == nullptr) { - MS_LOG(ERROR) << "parameter is nullptr"; - return nullptr; - } - if (ctx == nullptr) { - MS_LOG(ERROR) << "ctx is nullptr"; - free(parameter); - return nullptr; - } - MS_ASSERT(desc.type == PrimitiveType_Elu); auto *kernel = new (std::nothrow) EluCPUKernel(parameter, inputs, outputs, ctx, primitive); if (kernel == nullptr) { MS_LOG(ERROR) << "Create Kernel failed, name: " << parameter->name_; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h index ced5262679..a0dfc066d6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h @@ -24,25 +24,21 @@ namespace mindspore::kernel { class EluCPUKernel : public LiteKernel { public: - explicit EluCPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const lite::InnerContext *ctx, - const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {} - ~EluCPUKernel() override{}; + EluCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : LiteKernel(parameter, inputs, outputs, ctx, primitive) { + elu_parameter_ = reinterpret_cast(op_parameter_); + } + ~EluCPUKernel() = default; int Init() override; int ReSize() override; int Run() override; int DoExcute(int task_id); - protected: - const lite::InnerContext *ctx_ = nullptr; - int thread_count_ = 1; - EluParameter *elu_parameter_ = nullptr; - private: - float *input_addr = nullptr; - float *output_addr = nullptr; + EluParameter *elu_parameter_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc index 4ef2ed84d4..fbadd72bca 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc @@ -26,9 +26,6 @@ using mindspore::schema::PrimitiveType_EmbeddingLookup; namespace mindspore::kernel { int EmbeddingLookupCPUKernel::Init() { - embedding_lookup_parameter_ = reinterpret_cast(op_parameter_); - embedding_lookup_parameter_->thread_num = thread_count_; - if (!InferShapeDone()) { return RET_OK; } @@ -36,24 +33,24 @@ int EmbeddingLookupCPUKernel::Init() { } int EmbeddingLookupCPUKernel::ReSize() { - embedding_lookup_parameter_->ids_size_ = in_tensors_.back()->ElementsNum(); - - embedding_lookup_parameter_->layer_size_ = 1; + param_->ids_size_ = in_tensors_.back()->ElementsNum(); + param_->layer_size_ = 1; auto in_shape = in_tensors_.front()->shape(); for (size_t i = 1; i < in_shape.size(); ++i) { - embedding_lookup_parameter_->layer_size_ *= in_shape[i]; + param_->layer_size_ *= in_shape[i]; } - embedding_lookup_parameter_->layer_num_ = 0; + param_->layer_num_ = 0; for (size_t i = 0; i < in_tensors_.size() - 1; ++i) { - embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0]; + param_->layer_num_ += in_tensors_[i]->shape()[0]; } - return RET_OK; } int EmbeddingLookupCPUKernel::DoExcute(int task_id) { - int error_code = EmbeddingLookup(input_addr_, ids_addr_, output_addr_, embedding_lookup_parameter_, task_id); + auto ids_addr = reinterpret_cast(in_tensors_.back()->MutableData()); + auto output_addr = reinterpret_cast(out_tensors_.front()->MutableData()); + int error_code = EmbeddingLookup(input_addr_, ids_addr, output_addr, param_, task_id); if (error_code != RET_OK) { MS_LOG(ERROR) << "embedding lookup error error_code[" << error_code << "]"; return RET_ERROR; @@ -62,8 +59,8 @@ int EmbeddingLookupCPUKernel::DoExcute(int task_id) { } int EmbeddingLookupRun(void *cdata, int task_id) { - auto EmbeddingLookupData = reinterpret_cast(cdata); - auto ret = EmbeddingLookupData->DoExcute(task_id); + auto kernel = reinterpret_cast(cdata); + auto ret = kernel->DoExcute(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "EmbeddingLookupRun error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; @@ -73,39 +70,38 @@ int EmbeddingLookupRun(void *cdata, int task_id) { int EmbeddingLookupCPUKernel::Run() { MS_ASSERT(context_->allocator != nullptr); - input_addr_ = reinterpret_cast(context_->allocator->Malloc( - sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_)); - embedding_lookup_parameter_->is_regulated_ = - reinterpret_cast(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_)); - if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) { + input_addr_ = + reinterpret_cast(context_->allocator->Malloc(sizeof(float) * param_->layer_size_ * param_->layer_num_)); + param_->is_regulated_ = reinterpret_cast(context_->allocator->Malloc(sizeof(bool) * param_->layer_num_)); + if (input_addr_ == nullptr || param_->is_regulated_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; - context_->allocator->Free(input_addr_); - context_->allocator->Free(embedding_lookup_parameter_->is_regulated_); + FreeRunBuff(); return RET_ERROR; } - for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) { - embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0; + for (int i = 0; i < param_->layer_num_; ++i) { + param_->is_regulated_[i] = param_->max_norm_ == 0; } - int dest_loc = 0; for (size_t i = 0; i < in_tensors_.size() - 1; i++) { auto input_t = reinterpret_cast(in_tensors_.at(i)->MutableData()); memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum()); dest_loc += in_tensors_.at(i)->ElementsNum(); } - output_addr_ = reinterpret_cast(out_tensors_.front()->MutableData()); - ids_addr_ = reinterpret_cast(in_tensors_.back()->MutableData()); - - auto ret = - ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num); - context_->allocator->Free(input_addr_); - context_->allocator->Free(embedding_lookup_parameter_->is_regulated_); + auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_); + FreeRunBuff(); if (ret != RET_OK) { MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]"; } return ret; } +void EmbeddingLookupCPUKernel::FreeRunBuff() { + context_->allocator->Free(input_addr_); + context_->allocator->Free(param_->is_regulated_); + input_addr_ = nullptr; + param_->is_regulated_ = nullptr; +} + kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *parameter, const lite::InnerContext *ctx, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h index edbfe0af49..ad78806765 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h @@ -27,30 +27,20 @@ class EmbeddingLookupCPUKernel : public LiteKernel { explicit EmbeddingLookupCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {} - ~EmbeddingLookupCPUKernel() override { - if (input_addr_ != nullptr) { - free(input_addr_); - } - if (embedding_lookup_parameter_->is_regulated_ != nullptr) { - free(embedding_lookup_parameter_->is_regulated_); - } - }; + : LiteKernel(parameter, inputs, outputs, ctx, primitive) { + param_ = reinterpret_cast(parameter); + } + ~EmbeddingLookupCPUKernel() = default; int Init() override; int ReSize() override; int Run() override; int DoExcute(int task_id); - protected: - const lite::InnerContext *ctx_ = nullptr; - int thread_count_ = 1; - EmbeddingLookupParameter *embedding_lookup_parameter_ = nullptr; - private: + void FreeRunBuff(); + EmbeddingLookupParameter *param_ = nullptr; float *input_addr_ = nullptr; - float *output_addr_ = nullptr; - int *ids_addr_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc index 5a75e98e2b..c1eed749e7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc @@ -44,7 +44,9 @@ void FullconnectionCPUKernel::FreeBuf() { int FullconnectionCPUKernel::ReSize() { FreeBuf(); int row = 1; - for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) row *= (out_tensors_[0]->shape())[i]; + for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) { + row *= (out_tensors_[0]->shape())[i]; + } fc_param_->row_ = row; fc_param_->col_ = out_tensors_[0]->shape().back(); fc_param_->deep_ = (in_tensors_[1]->shape())[1]; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc index 99a586e3c3..60d48f8b30 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc @@ -56,13 +56,12 @@ int InstanceNormCPUKernel::DoInstanceNorm(int task_id) { } int InstanceNormRun(void *cdata, int task_id) { - auto InstanceNormData = reinterpret_cast(cdata); - auto ret = InstanceNormData->DoInstanceNorm(task_id); + auto kernel = reinterpret_cast(cdata); + auto ret = kernel->DoInstanceNorm(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]"; - return RET_ERROR; } - return RET_OK; + return ret; } int InstanceNormCPUKernel::Run() { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc index c727970ba3..9492c7951a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc @@ -58,8 +58,8 @@ int LayerNormCPUKernel::DoLayerNorm(int thread_id) { } int LayerNormRun(void *cdata, int task_id) { - auto LayerNormData = reinterpret_cast(cdata); - auto ret = LayerNormData->DoLayerNorm(task_id); + auto kernel = reinterpret_cast(cdata); + auto ret = kernel->DoLayerNorm(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "LayerNormRun error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc index c396179670..e3abcb6b27 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc @@ -72,7 +72,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() { return RET_OK; } -int ConvolutionDepthwiseSWInt8CPUKernel::InitBuffer() { +int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() { if (conv_param_->input_channel_ % C8NUM != 0) { need_align_ = true; @@ -319,15 +319,10 @@ int ConvDwSWInt8Run(void *cdata, int task_id) { } int ConvolutionDepthwiseSWInt8CPUKernel::Run() { - auto ret = InitBuffer(); + auto ret = InitPackedInputOutput(); if (ret != RET_OK) { MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; - if (need_align_) { - context_->allocator->Free(packed_input_); - context_->allocator->Free(packed_output_); - packed_input_ = nullptr; - packed_output_ = nullptr; - } + FreePackedInputOutput(); return ret; } @@ -353,12 +348,17 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() { if (need_align_) { PackNHWC8ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + } + FreePackedInputOutput(); + return ret; +} + +void ConvolutionDepthwiseSWInt8CPUKernel::FreePackedInputOutput() { + if (need_align_) { context_->allocator->Free(packed_input_); context_->allocator->Free(packed_output_); packed_input_ = nullptr; packed_output_ = nullptr; } - return ret; } - } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h index be0b6c95ab..d97dfe8c29 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h @@ -36,10 +36,11 @@ class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel { int Run() override; int InitWeightBias(); - int InitBuffer(); + int InitPackedInputOutput(); int Execute(int task_id); private: + void FreePackedInputOutput(); int ReinitQuantParam(); int ReinitFreeBefore(); void FreeTmpQuant();