Merge pull request !5274 from yangruoqi713/litetags/v1.0.0
| @@ -109,15 +109,15 @@ static int ConvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseFp16CPUKernel::Run() { | int ConvolutionDepthwiseFp16CPUKernel::Run() { | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Prepare failed."; | MS_LOG(ERROR) << "Prepare failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -38,20 +38,6 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() { | |||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| FreeTmpBuffer(); | |||||
| } | |||||
| void ConvolutionDepthwiseSWFp16CPUKernel::FreeTmpBuffer() { | |||||
| if (need_align_) { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() { | int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() { | ||||
| @@ -59,14 +45,14 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() { | |||||
| need_align_ = true; | need_align_ = true; | ||||
| int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | ||||
| int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * C8; | int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * C8; | ||||
| packed_input_ = reinterpret_cast<float16_t *>(malloc(pack_input_size * sizeof(float16_t))); | |||||
| packed_input_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_input_size * sizeof(float16_t))); | |||||
| if (packed_input_ == nullptr) { | if (packed_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * C8; | int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * C8; | ||||
| packed_output_ = reinterpret_cast<float16_t *>(malloc(pack_output_size * sizeof(float16_t))); | |||||
| packed_output_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_output_size * sizeof(float16_t))); | |||||
| if (packed_output_ == nullptr) { | if (packed_output_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -129,18 +115,11 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Init() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() { | int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() { | ||||
| FreeTmpBuffer(); | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); | InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); | ||||
| ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -161,13 +140,19 @@ static int ConvDwSWFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Prepare failed."; | MS_LOG(ERROR) << "Prepare failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -194,6 +179,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| @@ -50,7 +50,6 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float16_t *packed_weight_ = nullptr; | float16_t *packed_weight_ = nullptr; | ||||
| float16_t *packed_input_ = nullptr; | float16_t *packed_input_ = nullptr; | ||||
| @@ -37,20 +37,6 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { | |||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| FreeTmpBuffer(); | |||||
| } | |||||
| void DeconvolutionDepthwiseFp16CPUKernel::FreeTmpBuffer() { | |||||
| if (need_align_) { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { | int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { | ||||
| @@ -69,14 +55,14 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { | |||||
| int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() { | int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() { | ||||
| int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | ||||
| int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * C8; | int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * C8; | ||||
| packed_input_ = reinterpret_cast<float16_t *>(malloc(pack_input_size * sizeof(float16_t))); | |||||
| packed_input_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_input_size * sizeof(float16_t))); | |||||
| if (packed_input_ == nullptr) { | if (packed_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * C8; | int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * C8; | ||||
| packed_output_ = reinterpret_cast<float16_t *>(malloc(pack_output_size * sizeof(float16_t))); | |||||
| packed_output_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_output_size * sizeof(float16_t))); | |||||
| if (packed_output_ == nullptr) { | if (packed_output_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -137,17 +123,11 @@ int DeconvolutionDepthwiseFp16CPUKernel::Init() { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { | int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { | ||||
| FreeTmpBuffer(); | |||||
| InitSlideParam(); | InitSlideParam(); | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -168,13 +148,18 @@ static int DeconvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) | |||||
| } | } | ||||
| int DeconvolutionDepthwiseFp16CPUKernel::Run() { | int DeconvolutionDepthwiseFp16CPUKernel::Run() { | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Prepare failed."; | MS_LOG(ERROR) << "Prepare failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -202,6 +187,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| @@ -52,7 +52,6 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float16_t *packed_weight_ = nullptr; | float16_t *packed_weight_ = nullptr; | ||||
| float16_t *packed_input_ = nullptr; | float16_t *packed_input_ = nullptr; | ||||
| @@ -100,15 +100,16 @@ int ConvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseCPUKernel::Run() { | int ConvolutionDepthwiseCPUKernel::Run() { | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Prepare failed."; | MS_LOG(ERROR) << "Prepare failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto input_tensor = in_tensors_.at(kInputIndex); | auto input_tensor = in_tensors_.at(kInputIndex); | ||||
| input_ptr_ = reinterpret_cast<float *>(input_tensor->Data()); | input_ptr_ = reinterpret_cast<float *>(input_tensor->Data()); | ||||
| @@ -36,20 +36,6 @@ ConvolutionDepthwiseSWCPUKernel::~ConvolutionDepthwiseSWCPUKernel() { | |||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| FreeTmpBuffer(); | |||||
| } | |||||
| void ConvolutionDepthwiseSWCPUKernel::FreeTmpBuffer() { | |||||
| if (need_align_) { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() { | int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() { | ||||
| @@ -89,7 +75,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitBuffer() { | |||||
| need_align_ = true; | need_align_ = true; | ||||
| int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); | int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); | ||||
| int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * IC4; | int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * IC4; | ||||
| packed_input_ = reinterpret_cast<float *>(malloc(pack_input_size * sizeof(float))); | |||||
| packed_input_ = reinterpret_cast<float *>(context_->allocator->Malloc(pack_input_size * sizeof(float))); | |||||
| if (packed_input_ == nullptr) { | if (packed_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -97,7 +83,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitBuffer() { | |||||
| int OC4 = UP_DIV(conv_param_->output_channel_, C4NUM); | int OC4 = UP_DIV(conv_param_->output_channel_, C4NUM); | ||||
| int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * OC4; | int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * OC4; | ||||
| packed_output_ = reinterpret_cast<float *>(malloc(pack_output_size * sizeof(float))); | |||||
| packed_output_ = reinterpret_cast<float *>(context_->allocator->Malloc(pack_output_size * sizeof(float))); | |||||
| if (packed_output_ == nullptr) { | if (packed_output_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -125,16 +111,9 @@ int ConvolutionDepthwiseSWCPUKernel::Init() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWCPUKernel::ReSize() { | int ConvolutionDepthwiseSWCPUKernel::ReSize() { | ||||
| FreeTmpBuffer(); | |||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| InitSlidingParamConvDw(sliding_, conv_param_, C4NUM); | InitSlidingParamConvDw(sliding_, conv_param_, C4NUM); | ||||
| conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_); | conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_); | ||||
| auto ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -155,13 +134,20 @@ int ConvDwSWRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseSWCPUKernel::Run() { | int ConvolutionDepthwiseSWCPUKernel::Run() { | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Prepare failed."; | MS_LOG(ERROR) << "Prepare failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto input_tensor = in_tensors_.at(kInputIndex); | auto input_tensor = in_tensors_.at(kInputIndex); | ||||
| @@ -190,7 +176,10 @@ int ConvolutionDepthwiseSWCPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_, | PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -40,7 +40,6 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float *packed_weight_ = nullptr; | float *packed_weight_ = nullptr; | ||||
| float *packed_input_ = nullptr; | float *packed_input_ = nullptr; | ||||
| @@ -36,20 +36,6 @@ DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { | |||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| FreeTmpBuffer(); | |||||
| } | |||||
| void DeconvolutionDepthwiseCPUKernel::FreeTmpBuffer() { | |||||
| if (need_align_) { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| int DeconvolutionDepthwiseCPUKernel::InitSlideParam() { | int DeconvolutionDepthwiseCPUKernel::InitSlideParam() { | ||||
| @@ -100,7 +86,7 @@ int DeconvolutionDepthwiseCPUKernel::InitBuffer() { | |||||
| need_align_ = true; | need_align_ = true; | ||||
| int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); | int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); | ||||
| int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * IC4; | int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * IC4; | ||||
| packed_input_ = reinterpret_cast<float *>(malloc(pack_input_size * sizeof(float))); | |||||
| packed_input_ = reinterpret_cast<float *>(context_->allocator->Malloc(pack_input_size * sizeof(float))); | |||||
| if (packed_input_ == nullptr) { | if (packed_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -108,7 +94,7 @@ int DeconvolutionDepthwiseCPUKernel::InitBuffer() { | |||||
| int OC4 = UP_DIV(conv_param_->output_channel_, C4NUM); | int OC4 = UP_DIV(conv_param_->output_channel_, C4NUM); | ||||
| int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * OC4; | int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * OC4; | ||||
| packed_output_ = reinterpret_cast<float *>(malloc(pack_output_size * sizeof(float))); | |||||
| packed_output_ = reinterpret_cast<float *>(context_->allocator->Malloc(pack_output_size * sizeof(float))); | |||||
| if (packed_output_ == nullptr) { | if (packed_output_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -137,15 +123,8 @@ int DeconvolutionDepthwiseCPUKernel::Init() { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseCPUKernel::ReSize() { | int DeconvolutionDepthwiseCPUKernel::ReSize() { | ||||
| FreeTmpBuffer(); | |||||
| InitSlideParam(); | InitSlideParam(); | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| auto ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -166,15 +145,23 @@ int DeconvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseCPUKernel::Run() { | int DeconvolutionDepthwiseCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | if (conv_param_->input_channel_ != conv_param_->output_channel_) { | ||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | MS_LOG(ERROR) << "Only support input channel equals output channel."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||||
| return ret; | |||||
| } | |||||
| ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; | |||||
| return ret; | |||||
| } | |||||
| auto input_tensor = in_tensors_.at(kInputIndex); | auto input_tensor = in_tensors_.at(kInputIndex); | ||||
| auto input_addr = reinterpret_cast<float *>(input_tensor->Data()); | auto input_addr = reinterpret_cast<float *>(input_tensor->Data()); | ||||
| @@ -191,7 +178,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() { | |||||
| packed_output_ = output_addr; | packed_output_ = output_addr; | ||||
| } | } | ||||
| auto ret = LiteBackendParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_); | |||||
| ret = LiteBackendParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -200,6 +187,8 @@ int DeconvolutionDepthwiseCPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_, | PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -41,7 +41,6 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float *packed_weight_ = nullptr; | float *packed_weight_ = nullptr; | ||||
| float *packed_input_ = nullptr; | float *packed_input_ = nullptr; | ||||
| @@ -28,19 +28,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | using mindspore::schema::PrimitiveType_DepthwiseConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| void ConvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { | |||||
| if (packed_input_ != nullptr) { | |||||
| free(packed_input_); | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (need_align_) { | |||||
| if (packed_output_ != nullptr) { | |||||
| free(packed_output_); | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| } | |||||
| ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { | ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { | ||||
| if (sliding != nullptr) { | if (sliding != nullptr) { | ||||
| delete sliding; | delete sliding; | ||||
| @@ -50,7 +37,6 @@ ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { | |||||
| free(packed_weight_); | free(packed_weight_); | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| FreeTmpBuffer(); | |||||
| FreeQuantParam(); | FreeQuantParam(); | ||||
| } | } | ||||
| @@ -88,7 +74,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | |||||
| int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() { | int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() { | ||||
| int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * | int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * | ||||
| UP_DIV(conv_param_->input_channel_, 4); | UP_DIV(conv_param_->input_channel_, 4); | ||||
| packed_input_ = reinterpret_cast<int16_t *>(malloc(pack_input_size * sizeof(int16_t))); | |||||
| packed_input_ = reinterpret_cast<int16_t *>(context_->allocator->Malloc(pack_input_size * sizeof(int16_t))); | |||||
| if (packed_input_ == nullptr) { | if (packed_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -98,7 +84,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||||
| need_align_ = true; | need_align_ = true; | ||||
| int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * | int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * | ||||
| UP_DIV(conv_param_->output_channel_, C4NUM); | UP_DIV(conv_param_->output_channel_, C4NUM); | ||||
| packed_output_ = reinterpret_cast<int8_t *>(malloc(pack_output_size * sizeof(int8_t))); | |||||
| packed_output_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); | |||||
| if (packed_input_ == nullptr) { | if (packed_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -120,7 +106,6 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | ||||
| FreeTmpBuffer(); | |||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| InitSlidingParamConvDw(sliding, conv_param_, C4NUM); | InitSlidingParamConvDw(sliding, conv_param_, C4NUM); | ||||
| @@ -134,11 +119,6 @@ int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||||
| MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; | MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -159,14 +139,20 @@ int ConvDwInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseInt8CPUKernel::Run() { | int ConvolutionDepthwiseInt8CPUKernel::Run() { | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Prepare failed."; | MS_LOG(ERROR) << "Prepare failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (conv_param_->input_channel_ != conv_param_->output_channel_) { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | |||||
| return RET_ERROR; | |||||
| ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | |||||
| return ret; | |||||
| } | } | ||||
| auto input_tensor = in_tensors_.at(kInputIndex); | auto input_tensor = in_tensors_.at(kInputIndex); | ||||
| @@ -187,7 +173,9 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_output_); | |||||
| } | } | ||||
| context_->allocator->Free(packed_input_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -40,7 +40,6 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding = nullptr; | SlidingWindowParam *sliding = nullptr; | ||||
| int16_t *packed_weight_ = nullptr; | int16_t *packed_weight_ = nullptr; | ||||
| int16_t *packed_input_ = nullptr; | int16_t *packed_input_ = nullptr; | ||||
| @@ -37,27 +37,9 @@ DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { | |||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| FreeTmpBuffer(); | |||||
| FreeQuantParam(); | FreeQuantParam(); | ||||
| } | } | ||||
| void DeconvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (need_align_) { | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| if (output_buffer_ != nullptr) { | |||||
| delete output_buffer_; | |||||
| output_buffer_ = nullptr; | |||||
| } | |||||
| } | |||||
| int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | ||||
| // init weight: int8 -> int16 | // init weight: int8 -> int16 | ||||
| // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 | // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 | ||||
| @@ -111,7 +93,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitSlideParam() { | |||||
| int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | ||||
| int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * | int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * | ||||
| UP_DIV(conv_param_->input_channel_, 4); | UP_DIV(conv_param_->input_channel_, 4); | ||||
| packed_input_ = reinterpret_cast<int16_t *>(malloc(pack_input_size * sizeof(int16_t))); | |||||
| packed_input_ = reinterpret_cast<int16_t *>(context_->allocator->Malloc(pack_input_size * sizeof(int16_t))); | |||||
| if (packed_input_ == nullptr) { | if (packed_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -121,7 +103,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||||
| need_align_ = true; | need_align_ = true; | ||||
| int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * | int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * | ||||
| UP_DIV(conv_param_->output_channel_, C4NUM); | UP_DIV(conv_param_->output_channel_, C4NUM); | ||||
| packed_output_ = reinterpret_cast<int8_t *>(malloc(pack_output_size * sizeof(int8_t))); | |||||
| packed_output_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); | |||||
| if (packed_output_ == nullptr) { | if (packed_output_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -129,8 +111,8 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||||
| memset(packed_output_, 0, pack_output_size * sizeof(int8_t)); | memset(packed_output_, 0, pack_output_size * sizeof(int8_t)); | ||||
| } | } | ||||
| output_buffer_ = | |||||
| reinterpret_cast<int32_t *>(malloc(conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * sizeof(int32_t))); | |||||
| output_buffer_ = reinterpret_cast<int32_t *>( | |||||
| context_->allocator->Malloc(conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * sizeof(int32_t))); | |||||
| if (output_buffer_ == nullptr) { | if (output_buffer_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -165,7 +147,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | ||||
| FreeTmpBuffer(); | |||||
| InitSlideParam(); | InitSlideParam(); | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -224,7 +205,10 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_output_); | |||||
| } | } | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(output_buffer_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -41,7 +41,6 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding = nullptr; | SlidingWindowParam *sliding = nullptr; | ||||
| int16_t *packed_weight_ = nullptr; | int16_t *packed_weight_ = nullptr; | ||||
| int16_t *packed_input_ = nullptr; | int16_t *packed_input_ = nullptr; | ||||