diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc index 1f66d0bd15..96f2b3d9b5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc @@ -109,15 +109,15 @@ static int ConvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { } int ConvolutionDepthwiseFp16CPUKernel::Run() { + if (conv_param_->input_channel_ != conv_param_->output_channel_) { + MS_LOG(ERROR) << "Only support input channel equals output channel."; + return RET_ERROR; + } auto ret = Prepare(); if (ret != RET_OK) { MS_LOG(ERROR) << "Prepare failed."; return RET_ERROR; } - if (conv_param_->input_channel_ != conv_param_->output_channel_) { - MS_LOG(ERROR) << "Only support input channel equals output channel."; - return RET_ERROR; - } ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc index 39f2eb7bf2..a7ebff8c6e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc @@ -38,20 +38,6 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() { delete packed_weight_; packed_weight_ = nullptr; } - FreeTmpBuffer(); -} - -void ConvolutionDepthwiseSWFp16CPUKernel::FreeTmpBuffer() { - if (need_align_) { - if (packed_input_ != nullptr) { - delete packed_input_; - packed_input_ = nullptr; - } - if (packed_output_ != nullptr) { - delete packed_output_; - packed_output_ = nullptr; - } - } } int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() { @@ -59,14 +45,14 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() { need_align_ = true; int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * C8; - packed_input_ = reinterpret_cast(malloc(pack_input_size * sizeof(float16_t))); + packed_input_ = reinterpret_cast(context_->allocator->Malloc(pack_input_size * sizeof(float16_t))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * C8; - packed_output_ = reinterpret_cast(malloc(pack_output_size * sizeof(float16_t))); + packed_output_ = reinterpret_cast(context_->allocator->Malloc(pack_output_size * sizeof(float16_t))); if (packed_output_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -129,18 +115,11 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Init() { } int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() { - FreeTmpBuffer(); auto ret = ConvolutionBaseCPUKernel::Init(); if (ret != RET_OK) { return ret; } InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); - - ret = InitBuffer(); - if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; - return RET_ERROR; - } return RET_OK; } @@ -161,13 +140,19 @@ static int ConvDwSWFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) } int ConvolutionDepthwiseSWFp16CPUKernel::Run() { + if (conv_param_->input_channel_ != conv_param_->output_channel_) { + MS_LOG(ERROR) << "Only support input channel equals output channel."; + return RET_ERROR; + } + auto ret = Prepare(); if (ret != RET_OK) { MS_LOG(ERROR) << "Prepare failed."; return RET_ERROR; } - if (conv_param_->input_channel_ != conv_param_->output_channel_) { - MS_LOG(ERROR) << "Only support input channel equals output channel."; + ret = InitBuffer(); + if (ret != 0) { + MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; return RET_ERROR; } @@ -194,6 +179,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { if (need_align_) { PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + context_->allocator->Free(packed_input_); + context_->allocator->Free(packed_output_); } ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h index dce8aeb468..582fde8eec 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h @@ -50,7 +50,6 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel int Execute(int task_id); private: - void FreeTmpBuffer(); SlidingWindowParam *sliding_ = nullptr; float16_t *packed_weight_ = nullptr; float16_t *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index 3a8ca200f2..25656b1b4f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -37,20 +37,6 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { delete packed_weight_; packed_weight_ = nullptr; } - FreeTmpBuffer(); -} - -void DeconvolutionDepthwiseFp16CPUKernel::FreeTmpBuffer() { - if (need_align_) { - if (packed_input_ != nullptr) { - delete packed_input_; - packed_input_ = nullptr; - } - if (packed_output_ != nullptr) { - delete packed_output_; - packed_output_ = nullptr; - } - } } int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { @@ -69,14 +55,14 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() { int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * C8; - packed_input_ = reinterpret_cast(malloc(pack_input_size * sizeof(float16_t))); + packed_input_ = reinterpret_cast(context_->allocator->Malloc(pack_input_size * sizeof(float16_t))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * C8; - packed_output_ = reinterpret_cast(malloc(pack_output_size * sizeof(float16_t))); + packed_output_ = reinterpret_cast(context_->allocator->Malloc(pack_output_size * sizeof(float16_t))); if (packed_output_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -137,17 +123,11 @@ int DeconvolutionDepthwiseFp16CPUKernel::Init() { } int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { - FreeTmpBuffer(); InitSlideParam(); auto ret = ConvolutionBaseCPUKernel::Init(); if (ret != RET_OK) { return ret; } - ret = InitBuffer(); - if (ret != 0) { - MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; - return RET_ERROR; - } return RET_OK; } @@ -168,13 +148,18 @@ static int DeconvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) } int DeconvolutionDepthwiseFp16CPUKernel::Run() { + if (conv_param_->input_channel_ != conv_param_->output_channel_) { + MS_LOG(ERROR) << "Only support input channel equals output channel."; + return RET_ERROR; + } auto ret = Prepare(); if (ret != RET_OK) { MS_LOG(ERROR) << "Prepare failed."; return RET_ERROR; } - if (conv_param_->input_channel_ != conv_param_->output_channel_) { - MS_LOG(ERROR) << "Only support input channel equals output channel."; + ret = InitBuffer(); + if (ret != 0) { + MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; return RET_ERROR; } @@ -202,6 +187,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { if (need_align_) { PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + context_->allocator->Free(packed_input_); + context_->allocator->Free(packed_output_); } ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h index 984c19731e..539d129664 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h @@ -52,7 +52,6 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel int Execute(int task_id); private: - void FreeTmpBuffer(); SlidingWindowParam *sliding_ = nullptr; float16_t *packed_weight_ = nullptr; float16_t *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc index 728c10523a..fe11e5fc1b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc @@ -100,15 +100,16 @@ int ConvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { } int ConvolutionDepthwiseCPUKernel::Run() { + if (conv_param_->input_channel_ != conv_param_->output_channel_) { + MS_LOG(ERROR) << "Only support input channel equals output channel."; + return RET_ERROR; + } auto ret = Prepare(); if (ret != RET_OK) { MS_LOG(ERROR) << "Prepare failed."; return ret; } - if (conv_param_->input_channel_ != conv_param_->output_channel_) { - MS_LOG(ERROR) << "Only support input channel equals output channel."; - return RET_ERROR; - } + auto input_tensor = in_tensors_.at(kInputIndex); input_ptr_ = reinterpret_cast(input_tensor->Data()); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc index 99aa130087..c21e5c53f2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc @@ -36,20 +36,6 @@ ConvolutionDepthwiseSWCPUKernel::~ConvolutionDepthwiseSWCPUKernel() { delete packed_weight_; packed_weight_ = nullptr; } - FreeTmpBuffer(); -} - -void ConvolutionDepthwiseSWCPUKernel::FreeTmpBuffer() { - if (need_align_) { - if (packed_input_ != nullptr) { - delete packed_input_; - packed_input_ = nullptr; - } - if (packed_output_ != nullptr) { - delete packed_output_; - packed_output_ = nullptr; - } - } } int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() { @@ -89,7 +75,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitBuffer() { need_align_ = true; int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * IC4; - packed_input_ = reinterpret_cast(malloc(pack_input_size * sizeof(float))); + packed_input_ = reinterpret_cast(context_->allocator->Malloc(pack_input_size * sizeof(float))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -97,7 +83,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitBuffer() { int OC4 = UP_DIV(conv_param_->output_channel_, C4NUM); int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * OC4; - packed_output_ = reinterpret_cast(malloc(pack_output_size * sizeof(float))); + packed_output_ = reinterpret_cast(context_->allocator->Malloc(pack_output_size * sizeof(float))); if (packed_output_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -125,16 +111,9 @@ int ConvolutionDepthwiseSWCPUKernel::Init() { } int ConvolutionDepthwiseSWCPUKernel::ReSize() { - FreeTmpBuffer(); ConvolutionBaseCPUKernel::Init(); InitSlidingParamConvDw(sliding_, conv_param_, C4NUM); conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_); - - auto ret = InitBuffer(); - if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; - return RET_ERROR; - } return RET_OK; } @@ -155,13 +134,20 @@ int ConvDwSWRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { } int ConvolutionDepthwiseSWCPUKernel::Run() { + if (conv_param_->input_channel_ != conv_param_->output_channel_) { + MS_LOG(ERROR) << "Only support input channel equals output channel."; + return RET_ERROR; + } + auto ret = Prepare(); if (ret != RET_OK) { MS_LOG(ERROR) << "Prepare failed."; return ret; } - if (conv_param_->input_channel_ != conv_param_->output_channel_) { - MS_LOG(ERROR) << "Only support input channel equals output channel."; + + ret = InitBuffer(); + if (ret != 0) { + MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; return RET_ERROR; } auto input_tensor = in_tensors_.at(kInputIndex); @@ -190,7 +176,10 @@ int ConvolutionDepthwiseSWCPUKernel::Run() { if (need_align_) { PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + context_->allocator->Free(packed_input_); + context_->allocator->Free(packed_output_); } + return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h index 07fb16e62f..58e236efe8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h @@ -40,7 +40,6 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel { int Execute(int task_id); private: - void FreeTmpBuffer(); SlidingWindowParam *sliding_ = nullptr; float *packed_weight_ = nullptr; float *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc index 7af1563963..844a239b9f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc @@ -36,20 +36,6 @@ DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { delete packed_weight_; packed_weight_ = nullptr; } - FreeTmpBuffer(); -} - -void DeconvolutionDepthwiseCPUKernel::FreeTmpBuffer() { - if (need_align_) { - if (packed_input_ != nullptr) { - delete packed_input_; - packed_input_ = nullptr; - } - if (packed_output_ != nullptr) { - delete packed_output_; - packed_output_ = nullptr; - } - } } int DeconvolutionDepthwiseCPUKernel::InitSlideParam() { @@ -100,7 +86,7 @@ int DeconvolutionDepthwiseCPUKernel::InitBuffer() { need_align_ = true; int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM); int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * IC4; - packed_input_ = reinterpret_cast(malloc(pack_input_size * sizeof(float))); + packed_input_ = reinterpret_cast(context_->allocator->Malloc(pack_input_size * sizeof(float))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -108,7 +94,7 @@ int DeconvolutionDepthwiseCPUKernel::InitBuffer() { int OC4 = UP_DIV(conv_param_->output_channel_, C4NUM); int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * OC4; - packed_output_ = reinterpret_cast(malloc(pack_output_size * sizeof(float))); + packed_output_ = reinterpret_cast(context_->allocator->Malloc(pack_output_size * sizeof(float))); if (packed_output_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -137,15 +123,8 @@ int DeconvolutionDepthwiseCPUKernel::Init() { } int DeconvolutionDepthwiseCPUKernel::ReSize() { - FreeTmpBuffer(); InitSlideParam(); ConvolutionBaseCPUKernel::Init(); - - auto ret = InitBuffer(); - if (ret != 0) { - MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; - return ret; - } return RET_OK; } @@ -166,15 +145,23 @@ int DeconvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { } int DeconvolutionDepthwiseCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; - return prepare_ret; - } if (conv_param_->input_channel_ != conv_param_->output_channel_) { MS_LOG(ERROR) << "Only support input channel equals output channel."; return RET_ERROR; } + + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Prepare fail!ret: " << ret; + return ret; + } + + ret = InitBuffer(); + if (ret != 0) { + MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; + return ret; + } + auto input_tensor = in_tensors_.at(kInputIndex); auto input_addr = reinterpret_cast(input_tensor->Data()); @@ -191,7 +178,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() { packed_output_ = output_addr; } - auto ret = LiteBackendParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_); + ret = LiteBackendParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; return RET_ERROR; @@ -200,6 +187,8 @@ int DeconvolutionDepthwiseCPUKernel::Run() { if (need_align_) { PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + context_->allocator->Free(packed_input_); + context_->allocator->Free(packed_output_); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h index 17b513d796..b1e1ab9fca 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h @@ -41,7 +41,6 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { int Execute(int task_id); private: - void FreeTmpBuffer(); SlidingWindowParam *sliding_ = nullptr; float *packed_weight_ = nullptr; float *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc index 33683f54cf..4cf2b00ec8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc @@ -28,19 +28,6 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DepthwiseConv2D; namespace mindspore::kernel { -void ConvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { - if (packed_input_ != nullptr) { - free(packed_input_); - packed_input_ = nullptr; - } - if (need_align_) { - if (packed_output_ != nullptr) { - free(packed_output_); - packed_output_ = nullptr; - } - } -} - ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { if (sliding != nullptr) { delete sliding; @@ -50,7 +37,6 @@ ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { free(packed_weight_); packed_weight_ = nullptr; } - FreeTmpBuffer(); FreeQuantParam(); } @@ -88,7 +74,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() { int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * UP_DIV(conv_param_->input_channel_, 4); - packed_input_ = reinterpret_cast(malloc(pack_input_size * sizeof(int16_t))); + packed_input_ = reinterpret_cast(context_->allocator->Malloc(pack_input_size * sizeof(int16_t))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -98,7 +84,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() { need_align_ = true; int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * UP_DIV(conv_param_->output_channel_, C4NUM); - packed_output_ = reinterpret_cast(malloc(pack_output_size * sizeof(int8_t))); + packed_output_ = reinterpret_cast(context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -120,7 +106,6 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() { } int ConvolutionDepthwiseInt8CPUKernel::ReSize() { - FreeTmpBuffer(); ConvolutionBaseCPUKernel::Init(); InitSlidingParamConvDw(sliding, conv_param_, C4NUM); @@ -134,11 +119,6 @@ int ConvolutionDepthwiseInt8CPUKernel::ReSize() { MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; return ret; } - ret = InitBuffer(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; - return ret; - } return RET_OK; } @@ -159,14 +139,20 @@ int ConvDwInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { } int ConvolutionDepthwiseInt8CPUKernel::Run() { + if (conv_param_->input_channel_ != conv_param_->output_channel_) { + MS_LOG(ERROR) << "Only support input channel equals output channel."; + return RET_ERROR; + } auto ret = Prepare(); if (ret != RET_OK) { MS_LOG(ERROR) << "Prepare failed."; return RET_ERROR; } - if (conv_param_->input_channel_ != conv_param_->output_channel_) { - MS_LOG(ERROR) << "Only support input channel equals output channel."; - return RET_ERROR; + + ret = InitBuffer(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; + return ret; } auto input_tensor = in_tensors_.at(kInputIndex); @@ -187,7 +173,9 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() { if (need_align_) { PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + context_->allocator->Free(packed_output_); } + context_->allocator->Free(packed_input_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h index cef519df8d..57d7beac79 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h @@ -40,7 +40,6 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { int Execute(int task_id); private: - void FreeTmpBuffer(); SlidingWindowParam *sliding = nullptr; int16_t *packed_weight_ = nullptr; int16_t *packed_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc index 4f51bdc6ef..74916a7586 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc @@ -37,27 +37,9 @@ DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { delete packed_weight_; packed_weight_ = nullptr; } - FreeTmpBuffer(); FreeQuantParam(); } -void DeconvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { - if (packed_input_ != nullptr) { - delete packed_input_; - packed_input_ = nullptr; - } - if (need_align_) { - if (packed_output_ != nullptr) { - delete packed_output_; - packed_output_ = nullptr; - } - } - if (output_buffer_ != nullptr) { - delete output_buffer_; - output_buffer_ = nullptr; - } -} - int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() { // init weight: int8 -> int16 // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 @@ -111,7 +93,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitSlideParam() { int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * UP_DIV(conv_param_->input_channel_, 4); - packed_input_ = reinterpret_cast(malloc(pack_input_size * sizeof(int16_t))); + packed_input_ = reinterpret_cast(context_->allocator->Malloc(pack_input_size * sizeof(int16_t))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -121,7 +103,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { need_align_ = true; int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * UP_DIV(conv_param_->output_channel_, C4NUM); - packed_output_ = reinterpret_cast(malloc(pack_output_size * sizeof(int8_t))); + packed_output_ = reinterpret_cast(context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); if (packed_output_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -129,8 +111,8 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { memset(packed_output_, 0, pack_output_size * sizeof(int8_t)); } - output_buffer_ = - reinterpret_cast(malloc(conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * sizeof(int32_t))); + output_buffer_ = reinterpret_cast( + context_->allocator->Malloc(conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * sizeof(int32_t))); if (output_buffer_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -165,7 +147,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() { } int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { - FreeTmpBuffer(); InitSlideParam(); ConvolutionBaseCPUKernel::Init(); @@ -224,7 +205,10 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { if (need_align_) { PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); + context_->allocator->Free(packed_output_); } + context_->allocator->Free(packed_input_); + context_->allocator->Free(output_buffer_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h index 4437a412d0..b6ad5245ae 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h @@ -41,7 +41,6 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { int Execute(int task_id); private: - void FreeTmpBuffer(); SlidingWindowParam *sliding = nullptr; int16_t *packed_weight_ = nullptr; int16_t *packed_input_ = nullptr;