diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index b815bb31b6..32884017a5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -43,17 +43,22 @@ int Convolution1x1FP16CPUKernel::InitMatmulParam() { return RET_OK; } +Convolution1x1FP16CPUKernel::~Convolution1x1FP16CPUKernel() { + FreeTmpBuffer(); + if (weight_ptr_ != nullptr) { + free(weight_ptr_); + weight_ptr_ = nullptr; + } + if (matmul_param_ != nullptr) { + delete matmul_param_; + matmul_param_ = nullptr; + } + return; +} + int Convolution1x1FP16CPUKernel::InitConv1x1Param() { pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 || conv_param_->stride_w_ != 1); - if (pre_trans_input_) { - input_ptr_ = reinterpret_cast(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t))); - if (input_ptr_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!"; - return RET_MEMORY_FAILED; - } - memset(input_ptr_, 0, matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t)); - } thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM)); thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM; @@ -74,17 +79,16 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() { MS_LOG(ERROR) << "Get Execute filter failed."; return ret; } + + bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float16_t)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; + return RET_ERROR; + } + memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float16_t)); if (in_tensors_.size() == 3) { - bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float16_t)); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; - return RET_ERROR; - } - memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float16_t)); Float32ToFloat16(reinterpret_cast(in_tensors_[2]->Data()), reinterpret_cast(bias_data_), conv_param_->output_channel_); - } else { - bias_data_ = nullptr; } weight_ptr_ = reinterpret_cast(malloc(matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float16_t))); @@ -102,22 +106,19 @@ int Convolution1x1FP16CPUKernel::Init() { if (!InferShapeDone()) { return RET_OK; } + int ret = InitWeightBias(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init weight bias failed."; + return ret; + } return ReSize(); } void Convolution1x1FP16CPUKernel::FreeTmpBuffer() { - if (weight_ptr_ != nullptr) { - free(weight_ptr_); - weight_ptr_ = nullptr; - } if (pack_input_ != nullptr) { free(pack_input_); pack_input_ = nullptr; } - if (pre_trans_input_ && input_ptr_ != nullptr) { - free(input_ptr_); - input_ptr_ = nullptr; - } return; } @@ -139,11 +140,6 @@ int Convolution1x1FP16CPUKernel::ReSize() { MS_LOG(ERROR) << "Init conv1x1 param failed."; return ret; } - ret = InitWeightBias(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Init weight bias failed."; - return ret; - } return RET_OK; } @@ -197,6 +193,15 @@ int Convolution1x1FP16CPUKernel::Run() { return ret; } + if (pre_trans_input_) { + input_ptr_ = reinterpret_cast( + ctx_->allocator->Malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t))); + if (input_ptr_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!"; + return RET_MEMORY_FAILED; + } + } + for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { Pre1x1Trans( execute_input_ + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_, @@ -211,6 +216,11 @@ int Convolution1x1FP16CPUKernel::Run() { ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); + + if (pre_trans_input_ && input_ptr_ != nullptr) { + ctx_->allocator->Free(input_ptr_); + input_ptr_ = nullptr; + } return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h index 7a7e61a157..12aa135bfe 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h @@ -34,13 +34,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) { matmul_param_ = new MatMulParameter(); } - ~Convolution1x1FP16CPUKernel() override { - FreeTmpBuffer(); - if (matmul_param_ != nullptr) { - delete matmul_param_; - matmul_param_ = nullptr; - } - } + ~Convolution1x1FP16CPUKernel() override; int Init() override; int ReSize() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc index 4343fb4ad1..06159c73ab 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc @@ -43,11 +43,16 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() { int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() { auto weight_tensor = in_tensors_.at(kWeightIndex); auto weight_data_type = weight_tensor->data_type(); + + auto input_channel = weight_tensor->Channel(); + auto output_channel = weight_tensor->Batch(); + auto kernel_h = weight_tensor->Height(); + auto kernel_w = weight_tensor->Width(); + MS_ASSERT(weight_data_type == kNumberTypeFloat32 || weight_data_type == kNumberTypeFloat16); if (weight_data_type == kNumberTypeFloat32) { float *origin_weight = reinterpret_cast(in_tensors_.at(kWeightIndex)->Data()); - size_t fp16_weight_size = conv_param_->input_channel_ * conv_param_->output_channel_ * conv_param_->kernel_h_ * - conv_param_->kernel_w_ * sizeof(float16_t); + size_t fp16_weight_size = input_channel * output_channel * kernel_h * kernel_w * sizeof(float16_t); fp16_weight_ = reinterpret_cast(malloc(fp16_weight_size)); if (fp16_weight_ == nullptr) { MS_LOG(ERROR) << "malloc fp16_weight_ failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc index ac983458df..09fe8de61b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc @@ -53,18 +53,10 @@ int DeConvolutionFp16CPUKernel::ReSize() { } void DeConvolutionFp16CPUKernel::FreeParam() { - if (tmp_buffer_ != nullptr) { - free(tmp_buffer_); - tmp_buffer_ = nullptr; - } if (pack_input_ != nullptr) { free(pack_input_); pack_input_ = nullptr; } - if (pack_output_ != nullptr) { - free(pack_output_); - pack_output_ = nullptr; - } return; } @@ -107,28 +99,44 @@ int DeConvolutionFp16CPUKernel::InitParam() { thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(conv_param_->output_channel_, C8NUM)); thread_stride_ = UP_DIV(UP_DIV(conv_param_->output_channel_, C8NUM), thread_count_); - pack_input_ = reinterpret_cast(malloc(row16_ * matmul_param_->deep_ * sizeof(float16_t))); + size_t size = row16_ * matmul_param_->deep_ * sizeof(float16_t); + pack_input_ = reinterpret_cast(malloc(size)); if (pack_input_ == nullptr) { MS_LOG(ERROR) << "deconv Malloc pack_input_ error!"; return RET_ERROR; } + memset(pack_input_, 0, size); + return RET_OK; +} +int DeConvolutionFp16CPUKernel::InitRunBuf() { pack_output_ = reinterpret_cast( - malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float16_t))); + ctx_->allocator->Malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float16_t))); if (pack_output_ == nullptr) { MS_LOG(ERROR) << "deconv Malloc pack_output_ error!"; return RET_NULL_PTR; } - tmp_buffer_ = reinterpret_cast(malloc(row16_ * col8_ * sizeof(float16_t))); + tmp_buffer_ = reinterpret_cast(ctx_->allocator->Malloc(row16_ * col8_ * sizeof(float16_t))); if (tmp_buffer_ == nullptr) { MS_LOG(ERROR) << "deconv Malloc tmp_buffer_ error!"; return RET_ERROR; } - return RET_OK; } +void DeConvolutionFp16CPUKernel::FreeRunBuf() { + if (tmp_buffer_ != nullptr) { + ctx_->allocator->Free(tmp_buffer_); + tmp_buffer_ = nullptr; + } + if (pack_output_ != nullptr) { + ctx_->allocator->Free(pack_output_); + pack_output_ = nullptr; + } + return; +} + int DeConvFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { auto deconv = reinterpret_cast(cdata); auto error_code = deconv->DoDeconv(task_id); @@ -171,10 +179,16 @@ int DeConvolutionFp16CPUKernel::Run() { } ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); + int error_code = InitRunBuf(); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; + return RET_ERROR; + } + for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { RowMajor2Col16MajorFp16(execute_input_, pack_input_, input_plane_, conv_param_->input_channel_); - int error_code = LiteBackendParallelLaunch(DeConvFp16Run, this, thread_count_); + error_code = LiteBackendParallelLaunch(DeConvFp16Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; return RET_ERROR; @@ -183,6 +197,7 @@ int DeConvolutionFp16CPUKernel::Run() { ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); + FreeRunBuf(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h index 502b87a310..fd729182cd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h @@ -47,6 +47,8 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { int DoDeconv(int task_id); private: + int InitRunBuf(); + void FreeRunBuf(); void FreeParam(); int InitParam(); int InitWeightBias(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc index 3ffee2108d..30b2b6a1e3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc @@ -24,6 +24,10 @@ using mindspore::lite::RET_OK; namespace mindspore::kernel { Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { FreeTmpBuffer(); + if (weight_ptr_ != nullptr) { + free(weight_ptr_); + weight_ptr_ = nullptr; + } if (matmul_param_ != nullptr) { delete matmul_param_; matmul_param_ = nullptr; @@ -31,18 +35,10 @@ Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { } void Convolution1x1CPUKernel::FreeTmpBuffer() { - if (weight_ptr_ != nullptr) { - free(weight_ptr_); - weight_ptr_ = nullptr; - } if (pack_input_ != nullptr) { free(pack_input_); pack_input_ = nullptr; } - if (pre_trans_input_ && input_ptr_ != nullptr) { - free(input_ptr_); - input_ptr_ = nullptr; - } return; } @@ -51,12 +47,7 @@ int Convolution1x1CPUKernel::ReSize() { ConvolutionBaseCPUKernel::Init(); InitConv1x1MatmulParam(); - int error_code = InitConv1x1BiasWeight(); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "Convolution base init failed."; - return error_code; - } - error_code = InitConv1x1Param(); + int error_code = InitConv1x1Param(); if (error_code != RET_OK) { MS_LOG(ERROR) << "Convolution base init failed."; return error_code; @@ -76,40 +67,35 @@ void Convolution1x1CPUKernel::InitConv1x1MatmulParam() { } int Convolution1x1CPUKernel::InitConv1x1BiasWeight() { + auto filter_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = filter_tensor->Channel(); + auto output_channel = filter_tensor->Batch(); + + int size = UP_ROUND(output_channel, C8NUM) * sizeof(float); + bias_data_ = malloc(size); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; + return RET_ERROR; + } + memset(bias_data_, 0, size); if (in_tensors_.size() == 3) { - bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float)); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; - return RET_ERROR; - } - memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float)); - memcpy(bias_data_, in_tensors_[2]->Data(), conv_param_->output_channel_ * sizeof(float)); - } else { - bias_data_ = nullptr; + memcpy(bias_data_, in_tensors_[kBiasIndex]->Data(), output_channel * sizeof(float)); } - weight_ptr_ = reinterpret_cast(malloc(matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float))); + size = input_channel * UP_ROUND(output_channel, C8NUM) * sizeof(float); + weight_ptr_ = reinterpret_cast(malloc(size)); if (weight_ptr_ == nullptr) { MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!"; return RET_ERROR; } - memset(weight_ptr_, 0, matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float)); - RowMajor2Col8Major(reinterpret_cast(in_tensors_[1]->Data()), weight_ptr_, matmul_param_->col_, - matmul_param_->deep_); + memset(weight_ptr_, 0, size); + RowMajor2Col8Major(reinterpret_cast(filter_tensor->Data()), weight_ptr_, output_channel, input_channel); return RET_OK; } int Convolution1x1CPUKernel::InitConv1x1Param() { pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 || conv_param_->stride_w_ != 1); - if (pre_trans_input_) { - input_ptr_ = reinterpret_cast(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float))); - if (input_ptr_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!"; - return RET_MEMORY_FAILED; - } - memset(input_ptr_, 0, matmul_param_->row_ * matmul_param_->deep_ * sizeof(float)); - } thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM)); thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM; @@ -140,6 +126,12 @@ int Convolution1x1CPUKernel::Init() { if (!InferShapeDone()) { return RET_OK; } + + int error_code = InitConv1x1BiasWeight(); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Convolution base init failed."; + return error_code; + } return ReSize(); } @@ -177,6 +169,15 @@ int Convolution1x1CPUKernel::Run() { auto src_in = reinterpret_cast(in_tensors_[0]->Data()); auto src_out = reinterpret_cast(out_tensors_[0]->Data()); + if (pre_trans_input_) { + input_ptr_ = + reinterpret_cast(ctx_->allocator->Malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float))); + if (input_ptr_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!"; + return RET_MEMORY_FAILED; + } + } + for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_, src_out + batch_index * matmul_param_->row_ * matmul_param_->col_); @@ -187,6 +188,11 @@ int Convolution1x1CPUKernel::Run() { return RET_ERROR; } } + + if (pre_trans_input_) { + ctx_->allocator->Free(input_ptr_); + input_ptr_ = nullptr; + } return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc index 60da420adb..6233c633df 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc @@ -38,18 +38,10 @@ void DeConvolutionCPUKernel::FreeTmpBuffer() { free(weight_ptr_); weight_ptr_ = nullptr; } - if (tmp_buffer_ != nullptr) { - free(tmp_buffer_); - tmp_buffer_ = nullptr; - } if (pack_input_ != nullptr) { free(pack_input_); pack_input_ = nullptr; } - if (pack_output_ != nullptr) { - free(pack_output_); - pack_output_ = nullptr; - } return; } @@ -114,19 +106,6 @@ int DeConvolutionCPUKernel::InitParam() { MS_LOG(ERROR) << "deconv Malloc pack_input_ error!"; return RET_ERROR; } - - pack_output_ = - reinterpret_cast(malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float))); - if (pack_output_ == nullptr) { - MS_LOG(ERROR) << "deconv Malloc pack_output_ error!"; - return RET_NULL_PTR; - } - - tmp_buffer_ = reinterpret_cast(malloc(matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float))); - if (tmp_buffer_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc tmp_buffer_ error!"; - return RET_ERROR; - } return RET_OK; } @@ -165,6 +144,35 @@ int DeConvolutionCPUKernel::Init() { return ReSize(); } +void DeConvolutionCPUKernel::FreeRunBuf() { + if (pack_output_ != nullptr) { + ctx_->allocator->Free(pack_output_); + pack_output_ = nullptr; + } + if (tmp_buffer_ != nullptr) { + ctx_->allocator->Free(tmp_buffer_); + tmp_buffer_ = nullptr; + } + return; +} + +int DeConvolutionCPUKernel::InitRunBuf() { + pack_output_ = reinterpret_cast( + ctx_->allocator->Malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float))); + if (pack_output_ == nullptr) { + MS_LOG(ERROR) << "deconv Malloc pack_output_ error!"; + return RET_NULL_PTR; + } + + tmp_buffer_ = + reinterpret_cast(ctx_->allocator->Malloc(matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float))); + if (tmp_buffer_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc tmp_buffer_ error!"; + return RET_NULL_PTR; + } + return RET_OK; +} + int DeConvolutionCPUKernel::Run() { auto prepare_ret = Prepare(); if (prepare_ret != RET_OK) { @@ -174,18 +182,26 @@ int DeConvolutionCPUKernel::Run() { float *src_in = reinterpret_cast(in_tensors_[0]->Data()); float *src_out = reinterpret_cast(out_tensors_[0]->Data()); + int error_code = InitRunBuf(); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; + return error_code; + } + for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { input_ptr_ = src_in + batch_index * input_plane_ * conv_param_->input_channel_; output_ptr_ = src_out + batch_index * output_plane_ * conv_param_->output_channel_; RowMajor2Col8Major(input_ptr_, pack_input_, input_plane_, conv_param_->input_channel_); - int error_code = LiteBackendParallelLaunch(DeConvFp32Run, this, thread_count_); + error_code = LiteBackendParallelLaunch(DeConvFp32Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; - return RET_ERROR; + return error_code; } } + + FreeRunBuf(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h index 6b441d9cb6..44c6dc70da 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h @@ -45,6 +45,8 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { int DoDeconv(int task_id); private: + int InitRunBuf(); + void FreeRunBuf(); int InitParam(); int InitWeightBias(); void FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc index 1852050237..ca69f7c20f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc @@ -37,21 +37,13 @@ void DeConvInt8CPUKernel::FreeTmpBuffer() { free(weight_ptr_); weight_ptr_ = nullptr; } - if (tmp_buffer_ != nullptr) { - free(tmp_buffer_); - tmp_buffer_ = nullptr; - } if (input_ptr_ != nullptr) { free(input_ptr_); input_ptr_ = nullptr; } - if (tmp_output_ != nullptr) { - free(tmp_output_); - tmp_output_ = nullptr; - } - if (input_sum_ != nullptr) { - free(input_sum_); - input_sum_ = nullptr; + if (weight_sum_ != nullptr) { + free(weight_sum_); + weight_sum_ = nullptr; } return; } @@ -176,21 +168,24 @@ int DeConvInt8CPUKernel::InitData() { } memset(input_ptr_, static_cast(conv_param_->conv_quant_arg_.input_quant_args_[0].zp_), size * sizeof(int8_t)); - size = UP_ROUND(conv_param_->input_h_ * conv_param_->input_w_, C4NUM) * - UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->kernel_w_ * conv_param_->kernel_h_; - tmp_buffer_ = reinterpret_cast(malloc(size * sizeof(int32_t))); + return RET_OK; +} +int DeConvInt8CPUKernel::InitRunBuf() { + int size = UP_ROUND(conv_param_->input_h_ * conv_param_->input_w_, C4NUM) * + UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->kernel_w_ * conv_param_->kernel_h_; + tmp_buffer_ = reinterpret_cast(ctx_->allocator->Malloc(size * sizeof(int32_t))); if (tmp_buffer_ == nullptr) { return RET_MEMORY_FAILED; } size = UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->output_h_ * conv_param_->output_w_; - tmp_output_ = reinterpret_cast(malloc(size * sizeof(int32_t))); + tmp_output_ = reinterpret_cast(ctx_->allocator->Malloc(size * sizeof(int32_t))); if (tmp_output_ == nullptr) { return RET_MEMORY_FAILED; } size = UP_ROUND(matmul_param_->row_, C4NUM); - input_sum_ = reinterpret_cast(malloc(size * sizeof(int32_t))); + input_sum_ = reinterpret_cast(ctx_->allocator->Malloc(size * sizeof(int32_t))); if (input_sum_ == nullptr) { return RET_MEMORY_FAILED; } @@ -198,6 +193,22 @@ int DeConvInt8CPUKernel::InitData() { return RET_OK; } +void DeConvInt8CPUKernel::FreeRunBuf() { + if (tmp_buffer_ != nullptr) { + ctx_->allocator->Free(tmp_buffer_); + tmp_buffer_ = nullptr; + } + if (tmp_output_ != nullptr) { + ctx_->allocator->Free(tmp_output_); + tmp_output_ = nullptr; + } + if (input_sum_ != nullptr) { + ctx_->allocator->Free(input_sum_); + input_sum_ = nullptr; + } + return; +} + int DeConvInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { auto deconv = reinterpret_cast(cdata); auto error_code = deconv->DoDeconv(task_id); @@ -240,6 +251,12 @@ int DeConvInt8CPUKernel::Run() { int8_t *src_in = reinterpret_cast(in_tensors_[0]->Data()); int8_t *src_out = reinterpret_cast(out_tensors_[0]->Data()); + int error_code = InitRunBuf(); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "deconv int8 InitRunBuf error! error_code[" << error_code << "]"; + return RET_ERROR; + } + for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { input_trans_func_(src_in + batch_index * matmul_param_->row_ * conv_param_->input_channel_, input_ptr_, matmul_param_->row_, matmul_param_->deep_); @@ -248,13 +265,14 @@ int DeConvInt8CPUKernel::Run() { DeConvPackInputSum(input_ptr_, input_sum_, conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, UP_ROUND(matmul_param_->row_, C4NUM), UP_ROUND(matmul_param_->deep_, C16NUM), support_optimize_); - int error_code = LiteBackendParallelLaunch(DeConvInt8Run, this, thread_count_); + error_code = LiteBackendParallelLaunch(DeConvInt8Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]"; return RET_ERROR; } } + FreeRunBuf(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h index 30ebc9851f..1cffed22bd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h @@ -51,6 +51,8 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel { int InitParam(); int InitBiasWeight(); void CheckSupportOptimize(); + int InitRunBuf(); + void FreeRunBuf(); private: int32_t *tmp_buffer_ = nullptr; /* record matmul result */