From: @sunsuodong Reviewed-by: @ddwsky,@zhanghaibo5 Signed-off-by: @zhanghaibo5tags/v1.1.0
| @@ -23,7 +23,7 @@ void Calculate_Data(const float *input_data, float *output_data, int num, EluPar | |||||
| } | } | ||||
| int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) { | int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) { | ||||
| for (size_t i = task_id; i < parameter->in_size_; i += parameter->thread_num_) { | |||||
| for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) { | |||||
| Calculate_Data(input_data, output_data, i, parameter); | Calculate_Data(input_data, output_data, i, parameter); | ||||
| } | } | ||||
| return NNACL_OK; | return NNACL_OK; | ||||
| @@ -22,7 +22,6 @@ | |||||
| typedef struct EluParameter { | typedef struct EluParameter { | ||||
| OpParameter op_parameter_; | OpParameter op_parameter_; | ||||
| float alpha_; | float alpha_; | ||||
| int thread_num_; | |||||
| int in_size_; | int in_size_; | ||||
| } EluParameter; | } EluParameter; | ||||
| @@ -47,7 +47,7 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding | |||||
| } | } | ||||
| int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) { | int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) { | ||||
| for (size_t i = task_id; i < parameter->ids_size_; i += parameter->thread_num) { | |||||
| for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) { | |||||
| int ret = CopyData(input_data, ids, output_data, i, parameter); | int ret = CopyData(input_data, ids, output_data, i, parameter); | ||||
| if (ret != NNACL_OK) { | if (ret != NNACL_OK) { | ||||
| return ret; | return ret; | ||||
| @@ -26,7 +26,6 @@ typedef struct EmbeddingLookupParameter { | |||||
| int ids_size_; | int ids_size_; | ||||
| int layer_size_; | int layer_size_; | ||||
| int layer_num_; | int layer_num_; | ||||
| int thread_num; | |||||
| } EmbeddingLookupParameter; | } EmbeddingLookupParameter; | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -40,7 +40,7 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() { | |||||
| } | } | ||||
| } | } | ||||
| int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() { | |||||
| int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() { | |||||
| if (conv_param_->input_channel_ % C8NUM != 0) { | if (conv_param_->input_channel_ % C8NUM != 0) { | ||||
| need_align_ = true; | need_align_ = true; | ||||
| int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | ||||
| @@ -142,19 +142,17 @@ static int ConvDwSWFp16Run(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | ||||
| auto ret = InitBuffer(); | |||||
| auto ret = InitPackedInputOutput(); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitPackedInputOutput failed."; | |||||
| FreePackedInputOutput(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | MS_LOG(ERROR) << "Get Execute tensor failed."; | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| FreePackedInputOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -173,11 +171,19 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| return RET_OK; | |||||
| FreePackedInputOutput(); | |||||
| return ret; | |||||
| } | |||||
| void ConvolutionDepthwiseSWFp16CPUKernel::FreePackedInputOutput() { | |||||
| if (need_align_) { | |||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| packed_input_ = nullptr; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -45,11 +45,12 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel | |||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int InitBuffer(); | |||||
| int InitPackedInputOutput(); | |||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreePackedInputOutput(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float16_t *packed_weight_ = nullptr; | float16_t *packed_weight_ = nullptr; | ||||
| float16_t *packed_input_ = nullptr; | float16_t *packed_input_ = nullptr; | ||||
| @@ -53,7 +53,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() { | |||||
| int DeconvolutionDepthwiseFp16CPUKernel::InitPackedInputOutput() { | |||||
| if (conv_param_->input_channel_ % C8NUM != 0) { | if (conv_param_->input_channel_ % C8NUM != 0) { | ||||
| need_align_ = true; | need_align_ = true; | ||||
| int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | ||||
| @@ -156,19 +156,17 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | MS_LOG(ERROR) << "Only support input channel equals output channel."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = InitBuffer(); | |||||
| auto ret = InitPackedInputOutput(); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; | |||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitPackedInputOutput failed."; | |||||
| FreePackedInputOutput(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | MS_LOG(ERROR) << "Get Execute tensor failed."; | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| FreePackedInputOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -191,14 +189,22 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| FreePackedInputOutput(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| void DeconvolutionDepthwiseFp16CPUKernel::FreePackedInputOutput() { | |||||
| if (need_align_) { | |||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| packed_input_ = nullptr; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | ||||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | const lite::InnerContext *ctx, const kernel::KernelKey &desc, | ||||
| @@ -46,12 +46,13 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel | |||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int InitBuffer(); | |||||
| int InitPackedInputOutput(); | |||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| int InitSlideParam(); | int InitSlideParam(); | ||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreePackedInputOutput(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float16_t *packed_weight_ = nullptr; | float16_t *packed_weight_ = nullptr; | ||||
| float16_t *packed_input_ = nullptr; | float16_t *packed_input_ = nullptr; | ||||
| @@ -183,7 +183,7 @@ int DeConvolutionFp16CPUKernel::Run() { | |||||
| int error_code = InitRunBuf(); | int error_code = InitRunBuf(); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; | |||||
| MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]"; | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| FreeRunBuf(); | FreeRunBuf(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -197,7 +197,7 @@ int DeConvolutionFp16CPUKernel::Run() { | |||||
| error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_); | error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; | |||||
| MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]"; | |||||
| } | } | ||||
| } | } | ||||
| @@ -70,7 +70,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionDepthwiseSWCPUKernel::InitBuffer() { | |||||
| int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() { | |||||
| if (conv_param_->input_channel_ % C4NUM != 0) { | if (conv_param_->input_channel_ % C4NUM != 0) { | ||||
| need_align_ = true; | need_align_ = true; | ||||
| int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); | int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); | ||||
| @@ -134,9 +134,10 @@ int ConvDwSWRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWCPUKernel::Run() { | int ConvolutionDepthwiseSWCPUKernel::Run() { | ||||
| auto ret = InitBuffer(); | |||||
| auto ret = InitPackedInputOutput(); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp32 InitPackedInputOutput failed."; | |||||
| FreePackedInputOutput(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto input_tensor = in_tensors_.at(kInputIndex); | auto input_tensor = in_tensors_.at(kInputIndex); | ||||
| @@ -159,16 +160,22 @@ int ConvolutionDepthwiseSWCPUKernel::Run() { | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_); | ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_, | PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| } | |||||
| FreePackedInputOutput(); | |||||
| return ret; | |||||
| } | |||||
| void ConvolutionDepthwiseSWCPUKernel::FreePackedInputOutput() { | |||||
| if (need_align_) { | |||||
| context_->allocator->Free(packed_input_); | context_->allocator->Free(packed_input_); | ||||
| context_->allocator->Free(packed_output_); | context_->allocator->Free(packed_output_); | ||||
| packed_input_ = nullptr; | |||||
| packed_output_ = nullptr; | |||||
| } | } | ||||
| return RET_OK; | |||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -35,11 +35,12 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int InitBuffer(); | |||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| int InitPackedInputOutput(); | |||||
| void FreePackedInputOutput(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float *packed_weight_ = nullptr; | float *packed_weight_ = nullptr; | ||||
| float *packed_input_ = nullptr; | float *packed_input_ = nullptr; | ||||
| @@ -146,21 +146,20 @@ int ConvolutionCPUKernel::Run() { | |||||
| auto ret = InitTmpBuffer(); | auto ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| FreeTmpBuffer(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "conv error error_code[" << error_code << "]"; | |||||
| FreeTmpBuffer(); | |||||
| return RET_ERROR; | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "conv error error_code[" << ret << "]"; | |||||
| } | } | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| ConvParameter *CreateNewConvParameter(ConvParameter *parameter) { | ConvParameter *CreateNewConvParameter(ConvParameter *parameter) { | ||||
| auto conv_parameter = reinterpret_cast<ConvParameter *>(malloc(sizeof(ConvParameter))); | |||||
| auto conv_parameter = new (std::nothrow) ConvParameter; | |||||
| if (conv_parameter == nullptr) { | if (conv_parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc new conv parameter failed."; | MS_LOG(ERROR) << "Malloc new conv parameter failed."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -222,17 +222,16 @@ int ConvolutionWinogradCPUKernel::Run() { | |||||
| auto ret = InitTmpBuffer(); | auto ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| FreeTmpBuffer(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]"; | |||||
| FreeTmpBuffer(); | |||||
| return RET_ERROR; | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | |||||
| } | } | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -82,7 +82,7 @@ int DeconvolutionDepthwiseCPUKernel::InitWeightBias() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int DeconvolutionDepthwiseCPUKernel::InitBuffer() { | |||||
| int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() { | |||||
| if (conv_param_->input_channel_ % C4NUM != 0) { | if (conv_param_->input_channel_ % C4NUM != 0) { | ||||
| need_align_ = true; | need_align_ = true; | ||||
| int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); | int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); | ||||
| @@ -151,9 +151,10 @@ int DeconvolutionDepthwiseCPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = InitBuffer(); | |||||
| auto ret = InitPackedInputOutput(); | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitPackedInputOutput failed.ret: " << ret; | |||||
| FreePackedInputOutput(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -176,16 +177,23 @@ int DeconvolutionDepthwiseCPUKernel::Run() { | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_); | ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_, | PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| } | |||||
| FreePackedInputOutput(); | |||||
| return ret; | |||||
| } | |||||
| void DeconvolutionDepthwiseCPUKernel::FreePackedInputOutput() { | |||||
| if (need_align_) { | |||||
| context_->allocator->Free(packed_input_); | context_->allocator->Free(packed_input_); | ||||
| context_->allocator->Free(packed_output_); | context_->allocator->Free(packed_output_); | ||||
| packed_input_ = nullptr; | |||||
| packed_output_ = nullptr; | |||||
| } | } | ||||
| return RET_OK; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuDeconvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuDeconvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -36,11 +36,12 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int InitBuffer(); | |||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| int InitPackedInputOutput(); | |||||
| void FreePackedInputOutput(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float *packed_weight_ = nullptr; | float *packed_weight_ = nullptr; | ||||
| float *packed_input_ = nullptr; | float *packed_input_ = nullptr; | ||||
| @@ -202,6 +202,7 @@ int DeConvolutionCPUKernel::Run() { | |||||
| int error_code = InitRunBuf(); | int error_code = InitRunBuf(); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; | ||||
| FreeRunBuf(); | |||||
| return error_code; | return error_code; | ||||
| } | } | ||||
| @@ -218,6 +219,7 @@ int DeConvolutionCPUKernel::Run() { | |||||
| error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_); | error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; | ||||
| FreeRunBuf(); | |||||
| return error_code; | return error_code; | ||||
| } | } | ||||
| } | } | ||||
| @@ -390,6 +390,7 @@ int DeConvolutionWinogradCPUKernel::Run() { | |||||
| auto ret = InitRunBuf(); | auto ret = InitRunBuf(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "InitRunBuf fail!ret: " << ret; | MS_LOG(ERROR) << "InitRunBuf fail!ret: " << ret; | ||||
| FreeRunBuf(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -410,5 +411,4 @@ int DeConvolutionWinogradCPUKernel::Run() { | |||||
| FreeRunBuf(); | FreeRunBuf(); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -26,13 +26,9 @@ using mindspore::schema::PrimitiveType_Elu; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int EluCPUKernel::Init() { | int EluCPUKernel::Init() { | ||||
| elu_parameter_ = reinterpret_cast<EluParameter *>(op_parameter_); | |||||
| elu_parameter_->thread_num_ = thread_count_; | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | return ReSize(); | ||||
| } | } | ||||
| @@ -42,6 +38,8 @@ int EluCPUKernel::ReSize() { | |||||
| } | } | ||||
| int EluCPUKernel::DoExcute(int task_id) { | int EluCPUKernel::DoExcute(int task_id) { | ||||
| auto input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | |||||
| auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||||
| Elu(input_addr, output_addr, elu_parameter_, task_id); | Elu(input_addr, output_addr, elu_parameter_, task_id); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -57,10 +55,7 @@ int EluRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int EluCPUKernel::Run() { | int EluCPUKernel::Run() { | ||||
| input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); | |||||
| output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, elu_parameter_->thread_num_); | |||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -72,16 +67,6 @@ kernel::LiteKernel *CpuEluFp32KernelCreator(const std::vector<lite::Tensor *> &i | |||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, | ||||
| const lite::InnerContext *ctx, const KernelKey &desc, | const lite::InnerContext *ctx, const KernelKey &desc, | ||||
| const mindspore::lite::PrimitiveC *primitive) { | const mindspore::lite::PrimitiveC *primitive) { | ||||
| if (parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "parameter is nullptr"; | |||||
| return nullptr; | |||||
| } | |||||
| if (ctx == nullptr) { | |||||
| MS_LOG(ERROR) << "ctx is nullptr"; | |||||
| free(parameter); | |||||
| return nullptr; | |||||
| } | |||||
| MS_ASSERT(desc.type == PrimitiveType_Elu); | |||||
| auto *kernel = new (std::nothrow) EluCPUKernel(parameter, inputs, outputs, ctx, primitive); | auto *kernel = new (std::nothrow) EluCPUKernel(parameter, inputs, outputs, ctx, primitive); | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "Create Kernel failed, name: " << parameter->name_; | MS_LOG(ERROR) << "Create Kernel failed, name: " << parameter->name_; | ||||
| @@ -24,25 +24,21 @@ | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class EluCPUKernel : public LiteKernel { | class EluCPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| explicit EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||||
| const mindspore::lite::PrimitiveC *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {} | |||||
| ~EluCPUKernel() override{}; | |||||
| EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||||
| const mindspore::lite::PrimitiveC *primitive) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| elu_parameter_ = reinterpret_cast<EluParameter *>(op_parameter_); | |||||
| } | |||||
| ~EluCPUKernel() = default; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int DoExcute(int task_id); | int DoExcute(int task_id); | ||||
| protected: | |||||
| const lite::InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 1; | |||||
| EluParameter *elu_parameter_ = nullptr; | |||||
| private: | private: | ||||
| float *input_addr = nullptr; | |||||
| float *output_addr = nullptr; | |||||
| EluParameter *elu_parameter_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -26,9 +26,6 @@ using mindspore::schema::PrimitiveType_EmbeddingLookup; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int EmbeddingLookupCPUKernel::Init() { | int EmbeddingLookupCPUKernel::Init() { | ||||
| embedding_lookup_parameter_ = reinterpret_cast<EmbeddingLookupParameter *>(op_parameter_); | |||||
| embedding_lookup_parameter_->thread_num = thread_count_; | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -36,24 +33,24 @@ int EmbeddingLookupCPUKernel::Init() { | |||||
| } | } | ||||
| int EmbeddingLookupCPUKernel::ReSize() { | int EmbeddingLookupCPUKernel::ReSize() { | ||||
| embedding_lookup_parameter_->ids_size_ = in_tensors_.back()->ElementsNum(); | |||||
| embedding_lookup_parameter_->layer_size_ = 1; | |||||
| param_->ids_size_ = in_tensors_.back()->ElementsNum(); | |||||
| param_->layer_size_ = 1; | |||||
| auto in_shape = in_tensors_.front()->shape(); | auto in_shape = in_tensors_.front()->shape(); | ||||
| for (size_t i = 1; i < in_shape.size(); ++i) { | for (size_t i = 1; i < in_shape.size(); ++i) { | ||||
| embedding_lookup_parameter_->layer_size_ *= in_shape[i]; | |||||
| param_->layer_size_ *= in_shape[i]; | |||||
| } | } | ||||
| embedding_lookup_parameter_->layer_num_ = 0; | |||||
| param_->layer_num_ = 0; | |||||
| for (size_t i = 0; i < in_tensors_.size() - 1; ++i) { | for (size_t i = 0; i < in_tensors_.size() - 1; ++i) { | ||||
| embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0]; | |||||
| param_->layer_num_ += in_tensors_[i]->shape()[0]; | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int EmbeddingLookupCPUKernel::DoExcute(int task_id) { | int EmbeddingLookupCPUKernel::DoExcute(int task_id) { | ||||
| int error_code = EmbeddingLookup(input_addr_, ids_addr_, output_addr_, embedding_lookup_parameter_, task_id); | |||||
| auto ids_addr = reinterpret_cast<int *>(in_tensors_.back()->MutableData()); | |||||
| auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||||
| int error_code = EmbeddingLookup(input_addr_, ids_addr, output_addr, param_, task_id); | |||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "embedding lookup error error_code[" << error_code << "]"; | MS_LOG(ERROR) << "embedding lookup error error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -62,8 +59,8 @@ int EmbeddingLookupCPUKernel::DoExcute(int task_id) { | |||||
| } | } | ||||
| int EmbeddingLookupRun(void *cdata, int task_id) { | int EmbeddingLookupRun(void *cdata, int task_id) { | ||||
| auto EmbeddingLookupData = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata); | |||||
| auto ret = EmbeddingLookupData->DoExcute(task_id); | |||||
| auto kernel = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata); | |||||
| auto ret = kernel->DoExcute(task_id); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "EmbeddingLookupRun error task_id[" << task_id << "] error_code[" << ret << "]"; | MS_LOG(ERROR) << "EmbeddingLookupRun error task_id[" << task_id << "] error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -73,39 +70,38 @@ int EmbeddingLookupRun(void *cdata, int task_id) { | |||||
| int EmbeddingLookupCPUKernel::Run() { | int EmbeddingLookupCPUKernel::Run() { | ||||
| MS_ASSERT(context_->allocator != nullptr); | MS_ASSERT(context_->allocator != nullptr); | ||||
| input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc( | |||||
| sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_)); | |||||
| embedding_lookup_parameter_->is_regulated_ = | |||||
| reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_)); | |||||
| if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) { | |||||
| input_addr_ = | |||||
| reinterpret_cast<float *>(context_->allocator->Malloc(sizeof(float) * param_->layer_size_ * param_->layer_num_)); | |||||
| param_->is_regulated_ = reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * param_->layer_num_)); | |||||
| if (input_addr_ == nullptr || param_->is_regulated_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | MS_LOG(ERROR) << "Memory allocation failed"; | ||||
| context_->allocator->Free(input_addr_); | |||||
| context_->allocator->Free(embedding_lookup_parameter_->is_regulated_); | |||||
| FreeRunBuff(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) { | |||||
| embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0; | |||||
| for (int i = 0; i < param_->layer_num_; ++i) { | |||||
| param_->is_regulated_[i] = param_->max_norm_ == 0; | |||||
| } | } | ||||
| int dest_loc = 0; | int dest_loc = 0; | ||||
| for (size_t i = 0; i < in_tensors_.size() - 1; i++) { | for (size_t i = 0; i < in_tensors_.size() - 1; i++) { | ||||
| auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData()); | auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData()); | ||||
| memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum()); | memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum()); | ||||
| dest_loc += in_tensors_.at(i)->ElementsNum(); | dest_loc += in_tensors_.at(i)->ElementsNum(); | ||||
| } | } | ||||
| output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); | |||||
| ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->MutableData()); | |||||
| auto ret = | |||||
| ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num); | |||||
| context_->allocator->Free(input_addr_); | |||||
| context_->allocator->Free(embedding_lookup_parameter_->is_regulated_); | |||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_); | |||||
| FreeRunBuff(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]"; | ||||
| } | } | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| void EmbeddingLookupCPUKernel::FreeRunBuff() { | |||||
| context_->allocator->Free(input_addr_); | |||||
| context_->allocator->Free(param_->is_regulated_); | |||||
| input_addr_ = nullptr; | |||||
| param_->is_regulated_ = nullptr; | |||||
| } | |||||
| kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| OpParameter *parameter, const lite::InnerContext *ctx, | OpParameter *parameter, const lite::InnerContext *ctx, | ||||
| @@ -27,30 +27,20 @@ class EmbeddingLookupCPUKernel : public LiteKernel { | |||||
| explicit EmbeddingLookupCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | explicit EmbeddingLookupCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {} | |||||
| ~EmbeddingLookupCPUKernel() override { | |||||
| if (input_addr_ != nullptr) { | |||||
| free(input_addr_); | |||||
| } | |||||
| if (embedding_lookup_parameter_->is_regulated_ != nullptr) { | |||||
| free(embedding_lookup_parameter_->is_regulated_); | |||||
| } | |||||
| }; | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| param_ = reinterpret_cast<EmbeddingLookupParameter *>(parameter); | |||||
| } | |||||
| ~EmbeddingLookupCPUKernel() = default; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int DoExcute(int task_id); | int DoExcute(int task_id); | ||||
| protected: | |||||
| const lite::InnerContext *ctx_ = nullptr; | |||||
| int thread_count_ = 1; | |||||
| EmbeddingLookupParameter *embedding_lookup_parameter_ = nullptr; | |||||
| private: | private: | ||||
| void FreeRunBuff(); | |||||
| EmbeddingLookupParameter *param_ = nullptr; | |||||
| float *input_addr_ = nullptr; | float *input_addr_ = nullptr; | ||||
| float *output_addr_ = nullptr; | |||||
| int *ids_addr_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -44,7 +44,9 @@ void FullconnectionCPUKernel::FreeBuf() { | |||||
| int FullconnectionCPUKernel::ReSize() { | int FullconnectionCPUKernel::ReSize() { | ||||
| FreeBuf(); | FreeBuf(); | ||||
| int row = 1; | int row = 1; | ||||
| for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) row *= (out_tensors_[0]->shape())[i]; | |||||
| for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) { | |||||
| row *= (out_tensors_[0]->shape())[i]; | |||||
| } | |||||
| fc_param_->row_ = row; | fc_param_->row_ = row; | ||||
| fc_param_->col_ = out_tensors_[0]->shape().back(); | fc_param_->col_ = out_tensors_[0]->shape().back(); | ||||
| fc_param_->deep_ = (in_tensors_[1]->shape())[1]; | fc_param_->deep_ = (in_tensors_[1]->shape())[1]; | ||||
| @@ -56,13 +56,12 @@ int InstanceNormCPUKernel::DoInstanceNorm(int task_id) { | |||||
| } | } | ||||
| int InstanceNormRun(void *cdata, int task_id) { | int InstanceNormRun(void *cdata, int task_id) { | ||||
| auto InstanceNormData = reinterpret_cast<InstanceNormCPUKernel *>(cdata); | |||||
| auto ret = InstanceNormData->DoInstanceNorm(task_id); | |||||
| auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata); | |||||
| auto ret = kernel->DoInstanceNorm(task_id); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| int InstanceNormCPUKernel::Run() { | int InstanceNormCPUKernel::Run() { | ||||
| @@ -58,8 +58,8 @@ int LayerNormCPUKernel::DoLayerNorm(int thread_id) { | |||||
| } | } | ||||
| int LayerNormRun(void *cdata, int task_id) { | int LayerNormRun(void *cdata, int task_id) { | ||||
| auto LayerNormData = reinterpret_cast<LayerNormCPUKernel *>(cdata); | |||||
| auto ret = LayerNormData->DoLayerNorm(task_id); | |||||
| auto kernel = reinterpret_cast<LayerNormCPUKernel *>(cdata); | |||||
| auto ret = kernel->DoLayerNorm(task_id); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "LayerNormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | MS_LOG(ERROR) << "LayerNormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -72,7 +72,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionDepthwiseSWInt8CPUKernel::InitBuffer() { | |||||
| int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() { | |||||
| if (conv_param_->input_channel_ % C8NUM != 0) { | if (conv_param_->input_channel_ % C8NUM != 0) { | ||||
| need_align_ = true; | need_align_ = true; | ||||
| @@ -319,15 +319,10 @@ int ConvDwSWInt8Run(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | ||||
| auto ret = InitBuffer(); | |||||
| auto ret = InitPackedInputOutput(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | ||||
| if (need_align_) { | |||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| packed_input_ = nullptr; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| FreePackedInputOutput(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -353,12 +348,17 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC8ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | PackNHWC8ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| } | |||||
| FreePackedInputOutput(); | |||||
| return ret; | |||||
| } | |||||
| void ConvolutionDepthwiseSWInt8CPUKernel::FreePackedInputOutput() { | |||||
| if (need_align_) { | |||||
| context_->allocator->Free(packed_input_); | context_->allocator->Free(packed_input_); | ||||
| context_->allocator->Free(packed_output_); | context_->allocator->Free(packed_output_); | ||||
| packed_input_ = nullptr; | packed_input_ = nullptr; | ||||
| packed_output_ = nullptr; | packed_output_ = nullptr; | ||||
| } | } | ||||
| return ret; | |||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -36,10 +36,11 @@ class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Run() override; | int Run() override; | ||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| int InitBuffer(); | |||||
| int InitPackedInputOutput(); | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreePackedInputOutput(); | |||||
| int ReinitQuantParam(); | int ReinitQuantParam(); | ||||
| int ReinitFreeBefore(); | int ReinitFreeBefore(); | ||||
| void FreeTmpQuant(); | void FreeTmpQuant(); | ||||