Browse Source

!4179 [MS][LITE][Develop]malloc during runtime: conv1x1 deconv

Merge pull request !4179 from ling/conv1x1
tags/v0.7.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
8ea8c2671c
10 changed files with 196 additions and 126 deletions
  1. +39
    -29
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
  2. +1
    -7
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
  3. +7
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
  4. +28
    -13
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
  5. +2
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
  6. +41
    -35
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
  7. +39
    -23
      mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc
  8. +2
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h
  9. +35
    -17
      mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
  10. +2
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h

+ 39
- 29
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc View File

@@ -43,17 +43,22 @@ int Convolution1x1FP16CPUKernel::InitMatmulParam() {
return RET_OK; return RET_OK;
} }


Convolution1x1FP16CPUKernel::~Convolution1x1FP16CPUKernel() {
FreeTmpBuffer();
if (weight_ptr_ != nullptr) {
free(weight_ptr_);
weight_ptr_ = nullptr;
}
if (matmul_param_ != nullptr) {
delete matmul_param_;
matmul_param_ = nullptr;
}
return;
}

int Convolution1x1FP16CPUKernel::InitConv1x1Param() { int Convolution1x1FP16CPUKernel::InitConv1x1Param() {
pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 || pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 ||
conv_param_->stride_w_ != 1); conv_param_->stride_w_ != 1);
if (pre_trans_input_) {
input_ptr_ = reinterpret_cast<float16_t *>(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t)));
if (input_ptr_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!";
return RET_MEMORY_FAILED;
}
memset(input_ptr_, 0, matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t));
}


thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM)); thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM));
thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM; thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM;
@@ -74,17 +79,16 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() {
MS_LOG(ERROR) << "Get Execute filter failed."; MS_LOG(ERROR) << "Get Execute filter failed.";
return ret; return ret;
} }

bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float16_t));
if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
return RET_ERROR;
}
memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float16_t));
if (in_tensors_.size() == 3) { if (in_tensors_.size() == 3) {
bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float16_t));
if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
return RET_ERROR;
}
memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float16_t));
Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->Data()), reinterpret_cast<float16_t *>(bias_data_), Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->Data()), reinterpret_cast<float16_t *>(bias_data_),
conv_param_->output_channel_); conv_param_->output_channel_);
} else {
bias_data_ = nullptr;
} }


weight_ptr_ = reinterpret_cast<float16_t *>(malloc(matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float16_t))); weight_ptr_ = reinterpret_cast<float16_t *>(malloc(matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float16_t)));
@@ -102,22 +106,19 @@ int Convolution1x1FP16CPUKernel::Init() {
if (!InferShapeDone()) { if (!InferShapeDone()) {
return RET_OK; return RET_OK;
} }
int ret = InitWeightBias();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init weight bias failed.";
return ret;
}
return ReSize(); return ReSize();
} }


void Convolution1x1FP16CPUKernel::FreeTmpBuffer() { void Convolution1x1FP16CPUKernel::FreeTmpBuffer() {
if (weight_ptr_ != nullptr) {
free(weight_ptr_);
weight_ptr_ = nullptr;
}
if (pack_input_ != nullptr) { if (pack_input_ != nullptr) {
free(pack_input_); free(pack_input_);
pack_input_ = nullptr; pack_input_ = nullptr;
} }
if (pre_trans_input_ && input_ptr_ != nullptr) {
free(input_ptr_);
input_ptr_ = nullptr;
}
return; return;
} }


@@ -139,11 +140,6 @@ int Convolution1x1FP16CPUKernel::ReSize() {
MS_LOG(ERROR) << "Init conv1x1 param failed."; MS_LOG(ERROR) << "Init conv1x1 param failed.";
return ret; return ret;
} }
ret = InitWeightBias();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init weight bias failed.";
return ret;
}
return RET_OK; return RET_OK;
} }


@@ -197,6 +193,15 @@ int Convolution1x1FP16CPUKernel::Run() {
return ret; return ret;
} }


if (pre_trans_input_) {
input_ptr_ = reinterpret_cast<float16_t *>(
ctx_->allocator->Malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float16_t)));
if (input_ptr_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!";
return RET_MEMORY_FAILED;
}
}

for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
Pre1x1Trans( Pre1x1Trans(
execute_input_ + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_, execute_input_ + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_,
@@ -211,6 +216,11 @@ int Convolution1x1FP16CPUKernel::Run() {


ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();

if (pre_trans_input_ && input_ptr_ != nullptr) {
ctx_->allocator->Free(input_ptr_);
input_ptr_ = nullptr;
}
return RET_OK; return RET_OK;
} }
} // namespace mindspore::kernel } // namespace mindspore::kernel

+ 1
- 7
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h View File

@@ -34,13 +34,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) { : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {
matmul_param_ = new MatMulParameter(); matmul_param_ = new MatMulParameter();
} }
~Convolution1x1FP16CPUKernel() override {
FreeTmpBuffer();
if (matmul_param_ != nullptr) {
delete matmul_param_;
matmul_param_ = nullptr;
}
}
~Convolution1x1FP16CPUKernel() override;


int Init() override; int Init() override;
int ReSize() override; int ReSize() override;


+ 7
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc View File

@@ -43,11 +43,16 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() {
int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() { int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() {
auto weight_tensor = in_tensors_.at(kWeightIndex); auto weight_tensor = in_tensors_.at(kWeightIndex);
auto weight_data_type = weight_tensor->data_type(); auto weight_data_type = weight_tensor->data_type();

auto input_channel = weight_tensor->Channel();
auto output_channel = weight_tensor->Batch();
auto kernel_h = weight_tensor->Height();
auto kernel_w = weight_tensor->Width();

MS_ASSERT(weight_data_type == kNumberTypeFloat32 || weight_data_type == kNumberTypeFloat16); MS_ASSERT(weight_data_type == kNumberTypeFloat32 || weight_data_type == kNumberTypeFloat16);
if (weight_data_type == kNumberTypeFloat32) { if (weight_data_type == kNumberTypeFloat32) {
float *origin_weight = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->Data()); float *origin_weight = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->Data());
size_t fp16_weight_size = conv_param_->input_channel_ * conv_param_->output_channel_ * conv_param_->kernel_h_ *
conv_param_->kernel_w_ * sizeof(float16_t);
size_t fp16_weight_size = input_channel * output_channel * kernel_h * kernel_w * sizeof(float16_t);
fp16_weight_ = reinterpret_cast<float16_t *>(malloc(fp16_weight_size)); fp16_weight_ = reinterpret_cast<float16_t *>(malloc(fp16_weight_size));
if (fp16_weight_ == nullptr) { if (fp16_weight_ == nullptr) {
MS_LOG(ERROR) << "malloc fp16_weight_ failed."; MS_LOG(ERROR) << "malloc fp16_weight_ failed.";


+ 28
- 13
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc View File

@@ -53,18 +53,10 @@ int DeConvolutionFp16CPUKernel::ReSize() {
} }


void DeConvolutionFp16CPUKernel::FreeParam() { void DeConvolutionFp16CPUKernel::FreeParam() {
if (tmp_buffer_ != nullptr) {
free(tmp_buffer_);
tmp_buffer_ = nullptr;
}
if (pack_input_ != nullptr) { if (pack_input_ != nullptr) {
free(pack_input_); free(pack_input_);
pack_input_ = nullptr; pack_input_ = nullptr;
} }
if (pack_output_ != nullptr) {
free(pack_output_);
pack_output_ = nullptr;
}
return; return;
} }


@@ -107,28 +99,44 @@ int DeConvolutionFp16CPUKernel::InitParam() {
thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(conv_param_->output_channel_, C8NUM)); thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(conv_param_->output_channel_, C8NUM));
thread_stride_ = UP_DIV(UP_DIV(conv_param_->output_channel_, C8NUM), thread_count_); thread_stride_ = UP_DIV(UP_DIV(conv_param_->output_channel_, C8NUM), thread_count_);


pack_input_ = reinterpret_cast<float16_t *>(malloc(row16_ * matmul_param_->deep_ * sizeof(float16_t)));
size_t size = row16_ * matmul_param_->deep_ * sizeof(float16_t);
pack_input_ = reinterpret_cast<float16_t *>(malloc(size));
if (pack_input_ == nullptr) { if (pack_input_ == nullptr) {
MS_LOG(ERROR) << "deconv Malloc pack_input_ error!"; MS_LOG(ERROR) << "deconv Malloc pack_input_ error!";
return RET_ERROR; return RET_ERROR;
} }
memset(pack_input_, 0, size);
return RET_OK;
}


int DeConvolutionFp16CPUKernel::InitRunBuf() {
pack_output_ = reinterpret_cast<float16_t *>( pack_output_ = reinterpret_cast<float16_t *>(
malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float16_t)));
ctx_->allocator->Malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float16_t)));
if (pack_output_ == nullptr) { if (pack_output_ == nullptr) {
MS_LOG(ERROR) << "deconv Malloc pack_output_ error!"; MS_LOG(ERROR) << "deconv Malloc pack_output_ error!";
return RET_NULL_PTR; return RET_NULL_PTR;
} }


tmp_buffer_ = reinterpret_cast<float16_t *>(malloc(row16_ * col8_ * sizeof(float16_t)));
tmp_buffer_ = reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(row16_ * col8_ * sizeof(float16_t)));
if (tmp_buffer_ == nullptr) { if (tmp_buffer_ == nullptr) {
MS_LOG(ERROR) << "deconv Malloc tmp_buffer_ error!"; MS_LOG(ERROR) << "deconv Malloc tmp_buffer_ error!";
return RET_ERROR; return RET_ERROR;
} }

return RET_OK; return RET_OK;
} }


void DeConvolutionFp16CPUKernel::FreeRunBuf() {
if (tmp_buffer_ != nullptr) {
ctx_->allocator->Free(tmp_buffer_);
tmp_buffer_ = nullptr;
}
if (pack_output_ != nullptr) {
ctx_->allocator->Free(pack_output_);
pack_output_ = nullptr;
}
return;
}

int DeConvFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { int DeConvFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto deconv = reinterpret_cast<DeConvolutionFp16CPUKernel *>(cdata); auto deconv = reinterpret_cast<DeConvolutionFp16CPUKernel *>(cdata);
auto error_code = deconv->DoDeconv(task_id); auto error_code = deconv->DoDeconv(task_id);
@@ -171,10 +179,16 @@ int DeConvolutionFp16CPUKernel::Run() {
} }
ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); ConvolutionBaseFP16CPUKernel::GetExecuteTensor();


int error_code = InitRunBuf();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
return RET_ERROR;
}

for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
RowMajor2Col16MajorFp16(execute_input_, pack_input_, input_plane_, conv_param_->input_channel_); RowMajor2Col16MajorFp16(execute_input_, pack_input_, input_plane_, conv_param_->input_channel_);


int error_code = LiteBackendParallelLaunch(DeConvFp16Run, this, thread_count_);
error_code = LiteBackendParallelLaunch(DeConvFp16Run, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
@@ -183,6 +197,7 @@ int DeConvolutionFp16CPUKernel::Run() {


ConvolutionBaseFP16CPUKernel::IfCastOutput(); ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeRunBuf();


return RET_OK; return RET_OK;
} }


+ 2
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h View File

@@ -47,6 +47,8 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
int DoDeconv(int task_id); int DoDeconv(int task_id);


private: private:
int InitRunBuf();
void FreeRunBuf();
void FreeParam(); void FreeParam();
int InitParam(); int InitParam();
int InitWeightBias(); int InitWeightBias();


+ 41
- 35
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc View File

@@ -24,6 +24,10 @@ using mindspore::lite::RET_OK;
namespace mindspore::kernel { namespace mindspore::kernel {
Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { Convolution1x1CPUKernel::~Convolution1x1CPUKernel() {
FreeTmpBuffer(); FreeTmpBuffer();
if (weight_ptr_ != nullptr) {
free(weight_ptr_);
weight_ptr_ = nullptr;
}
if (matmul_param_ != nullptr) { if (matmul_param_ != nullptr) {
delete matmul_param_; delete matmul_param_;
matmul_param_ = nullptr; matmul_param_ = nullptr;
@@ -31,18 +35,10 @@ Convolution1x1CPUKernel::~Convolution1x1CPUKernel() {
} }


void Convolution1x1CPUKernel::FreeTmpBuffer() { void Convolution1x1CPUKernel::FreeTmpBuffer() {
if (weight_ptr_ != nullptr) {
free(weight_ptr_);
weight_ptr_ = nullptr;
}
if (pack_input_ != nullptr) { if (pack_input_ != nullptr) {
free(pack_input_); free(pack_input_);
pack_input_ = nullptr; pack_input_ = nullptr;
} }
if (pre_trans_input_ && input_ptr_ != nullptr) {
free(input_ptr_);
input_ptr_ = nullptr;
}
return; return;
} }


@@ -51,12 +47,7 @@ int Convolution1x1CPUKernel::ReSize() {
ConvolutionBaseCPUKernel::Init(); ConvolutionBaseCPUKernel::Init();
InitConv1x1MatmulParam(); InitConv1x1MatmulParam();


int error_code = InitConv1x1BiasWeight();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Convolution base init failed.";
return error_code;
}
error_code = InitConv1x1Param();
int error_code = InitConv1x1Param();
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Convolution base init failed."; MS_LOG(ERROR) << "Convolution base init failed.";
return error_code; return error_code;
@@ -76,40 +67,35 @@ void Convolution1x1CPUKernel::InitConv1x1MatmulParam() {
} }


int Convolution1x1CPUKernel::InitConv1x1BiasWeight() { int Convolution1x1CPUKernel::InitConv1x1BiasWeight() {
auto filter_tensor = in_tensors_.at(kWeightIndex);
auto input_channel = filter_tensor->Channel();
auto output_channel = filter_tensor->Batch();

int size = UP_ROUND(output_channel, C8NUM) * sizeof(float);
bias_data_ = malloc(size);
if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
return RET_ERROR;
}
memset(bias_data_, 0, size);
if (in_tensors_.size() == 3) { if (in_tensors_.size() == 3) {
bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float));
if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
return RET_ERROR;
}
memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float));
memcpy(bias_data_, in_tensors_[2]->Data(), conv_param_->output_channel_ * sizeof(float));
} else {
bias_data_ = nullptr;
memcpy(bias_data_, in_tensors_[kBiasIndex]->Data(), output_channel * sizeof(float));
} }


weight_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float)));
size = input_channel * UP_ROUND(output_channel, C8NUM) * sizeof(float);
weight_ptr_ = reinterpret_cast<float *>(malloc(size));
if (weight_ptr_ == nullptr) { if (weight_ptr_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!"; MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
return RET_ERROR; return RET_ERROR;
} }
memset(weight_ptr_, 0, matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float));
RowMajor2Col8Major(reinterpret_cast<float *>(in_tensors_[1]->Data()), weight_ptr_, matmul_param_->col_,
matmul_param_->deep_);
memset(weight_ptr_, 0, size);
RowMajor2Col8Major(reinterpret_cast<float *>(filter_tensor->Data()), weight_ptr_, output_channel, input_channel);
return RET_OK; return RET_OK;
} }


int Convolution1x1CPUKernel::InitConv1x1Param() { int Convolution1x1CPUKernel::InitConv1x1Param() {
pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 || pre_trans_input_ = (conv_param_->pad_h_ != 0 || conv_param_->pad_w_ != 0 || conv_param_->stride_h_ != 1 ||
conv_param_->stride_w_ != 1); conv_param_->stride_w_ != 1);
if (pre_trans_input_) {
input_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float)));
if (input_ptr_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!";
return RET_MEMORY_FAILED;
}
memset(input_ptr_, 0, matmul_param_->row_ * matmul_param_->deep_ * sizeof(float));
}


thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM)); thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM));
thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM; thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM;
@@ -140,6 +126,12 @@ int Convolution1x1CPUKernel::Init() {
if (!InferShapeDone()) { if (!InferShapeDone()) {
return RET_OK; return RET_OK;
} }

int error_code = InitConv1x1BiasWeight();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Convolution base init failed.";
return error_code;
}
return ReSize(); return ReSize();
} }


@@ -177,6 +169,15 @@ int Convolution1x1CPUKernel::Run() {
auto src_in = reinterpret_cast<float *>(in_tensors_[0]->Data()); auto src_in = reinterpret_cast<float *>(in_tensors_[0]->Data());
auto src_out = reinterpret_cast<float *>(out_tensors_[0]->Data()); auto src_out = reinterpret_cast<float *>(out_tensors_[0]->Data());


if (pre_trans_input_) {
input_ptr_ =
reinterpret_cast<float *>(ctx_->allocator->Malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float)));
if (input_ptr_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc input_ptr_ error!";
return RET_MEMORY_FAILED;
}
}

for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_, Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_,
src_out + batch_index * matmul_param_->row_ * matmul_param_->col_); src_out + batch_index * matmul_param_->row_ * matmul_param_->col_);
@@ -187,6 +188,11 @@ int Convolution1x1CPUKernel::Run() {
return RET_ERROR; return RET_ERROR;
} }
} }

if (pre_trans_input_) {
ctx_->allocator->Free(input_ptr_);
input_ptr_ = nullptr;
}
return RET_OK; return RET_OK;
} }
} // namespace mindspore::kernel } // namespace mindspore::kernel

+ 39
- 23
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc View File

@@ -38,18 +38,10 @@ void DeConvolutionCPUKernel::FreeTmpBuffer() {
free(weight_ptr_); free(weight_ptr_);
weight_ptr_ = nullptr; weight_ptr_ = nullptr;
} }
if (tmp_buffer_ != nullptr) {
free(tmp_buffer_);
tmp_buffer_ = nullptr;
}
if (pack_input_ != nullptr) { if (pack_input_ != nullptr) {
free(pack_input_); free(pack_input_);
pack_input_ = nullptr; pack_input_ = nullptr;
} }
if (pack_output_ != nullptr) {
free(pack_output_);
pack_output_ = nullptr;
}
return; return;
} }


@@ -114,19 +106,6 @@ int DeConvolutionCPUKernel::InitParam() {
MS_LOG(ERROR) << "deconv Malloc pack_input_ error!"; MS_LOG(ERROR) << "deconv Malloc pack_input_ error!";
return RET_ERROR; return RET_ERROR;
} }

pack_output_ =
reinterpret_cast<float *>(malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float)));
if (pack_output_ == nullptr) {
MS_LOG(ERROR) << "deconv Malloc pack_output_ error!";
return RET_NULL_PTR;
}

tmp_buffer_ = reinterpret_cast<float *>(malloc(matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float)));
if (tmp_buffer_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc tmp_buffer_ error!";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }


@@ -165,6 +144,35 @@ int DeConvolutionCPUKernel::Init() {
return ReSize(); return ReSize();
} }


void DeConvolutionCPUKernel::FreeRunBuf() {
if (pack_output_ != nullptr) {
ctx_->allocator->Free(pack_output_);
pack_output_ = nullptr;
}
if (tmp_buffer_ != nullptr) {
ctx_->allocator->Free(tmp_buffer_);
tmp_buffer_ = nullptr;
}
return;
}

int DeConvolutionCPUKernel::InitRunBuf() {
pack_output_ = reinterpret_cast<float *>(
ctx_->allocator->Malloc(UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float)));
if (pack_output_ == nullptr) {
MS_LOG(ERROR) << "deconv Malloc pack_output_ error!";
return RET_NULL_PTR;
}

tmp_buffer_ =
reinterpret_cast<float *>(ctx_->allocator->Malloc(matmul_param_->row_8_ * matmul_param_->col_8_ * sizeof(float)));
if (tmp_buffer_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc tmp_buffer_ error!";
return RET_NULL_PTR;
}
return RET_OK;
}

int DeConvolutionCPUKernel::Run() { int DeConvolutionCPUKernel::Run() {
auto prepare_ret = Prepare(); auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) { if (prepare_ret != RET_OK) {
@@ -174,18 +182,26 @@ int DeConvolutionCPUKernel::Run() {
float *src_in = reinterpret_cast<float *>(in_tensors_[0]->Data()); float *src_in = reinterpret_cast<float *>(in_tensors_[0]->Data());
float *src_out = reinterpret_cast<float *>(out_tensors_[0]->Data()); float *src_out = reinterpret_cast<float *>(out_tensors_[0]->Data());


int error_code = InitRunBuf();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
return error_code;
}

for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
input_ptr_ = src_in + batch_index * input_plane_ * conv_param_->input_channel_; input_ptr_ = src_in + batch_index * input_plane_ * conv_param_->input_channel_;
output_ptr_ = src_out + batch_index * output_plane_ * conv_param_->output_channel_; output_ptr_ = src_out + batch_index * output_plane_ * conv_param_->output_channel_;


RowMajor2Col8Major(input_ptr_, pack_input_, input_plane_, conv_param_->input_channel_); RowMajor2Col8Major(input_ptr_, pack_input_, input_plane_, conv_param_->input_channel_);


int error_code = LiteBackendParallelLaunch(DeConvFp32Run, this, thread_count_);
error_code = LiteBackendParallelLaunch(DeConvFp32Run, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
return RET_ERROR;
return error_code;
} }
} }

FreeRunBuf();
return RET_OK; return RET_OK;
} }




+ 2
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.h View File

@@ -45,6 +45,8 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
int DoDeconv(int task_id); int DoDeconv(int task_id);


private: private:
int InitRunBuf();
void FreeRunBuf();
int InitParam(); int InitParam();
int InitWeightBias(); int InitWeightBias();
void FreeTmpBuffer(); void FreeTmpBuffer();


+ 35
- 17
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc View File

@@ -37,21 +37,13 @@ void DeConvInt8CPUKernel::FreeTmpBuffer() {
free(weight_ptr_); free(weight_ptr_);
weight_ptr_ = nullptr; weight_ptr_ = nullptr;
} }
if (tmp_buffer_ != nullptr) {
free(tmp_buffer_);
tmp_buffer_ = nullptr;
}
if (input_ptr_ != nullptr) { if (input_ptr_ != nullptr) {
free(input_ptr_); free(input_ptr_);
input_ptr_ = nullptr; input_ptr_ = nullptr;
} }
if (tmp_output_ != nullptr) {
free(tmp_output_);
tmp_output_ = nullptr;
}
if (input_sum_ != nullptr) {
free(input_sum_);
input_sum_ = nullptr;
if (weight_sum_ != nullptr) {
free(weight_sum_);
weight_sum_ = nullptr;
} }
return; return;
} }
@@ -176,21 +168,24 @@ int DeConvInt8CPUKernel::InitData() {
} }
memset(input_ptr_, static_cast<int8_t>(conv_param_->conv_quant_arg_.input_quant_args_[0].zp_), size * sizeof(int8_t)); memset(input_ptr_, static_cast<int8_t>(conv_param_->conv_quant_arg_.input_quant_args_[0].zp_), size * sizeof(int8_t));


size = UP_ROUND(conv_param_->input_h_ * conv_param_->input_w_, C4NUM) *
UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->kernel_w_ * conv_param_->kernel_h_;
tmp_buffer_ = reinterpret_cast<int32_t *>(malloc(size * sizeof(int32_t)));
return RET_OK;
}
int DeConvInt8CPUKernel::InitRunBuf() {
int size = UP_ROUND(conv_param_->input_h_ * conv_param_->input_w_, C4NUM) *
UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->kernel_w_ * conv_param_->kernel_h_;
tmp_buffer_ = reinterpret_cast<int32_t *>(ctx_->allocator->Malloc(size * sizeof(int32_t)));
if (tmp_buffer_ == nullptr) { if (tmp_buffer_ == nullptr) {
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }


size = UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->output_h_ * conv_param_->output_w_; size = UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->output_h_ * conv_param_->output_w_;
tmp_output_ = reinterpret_cast<int32_t *>(malloc(size * sizeof(int32_t)));
tmp_output_ = reinterpret_cast<int32_t *>(ctx_->allocator->Malloc(size * sizeof(int32_t)));
if (tmp_output_ == nullptr) { if (tmp_output_ == nullptr) {
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }


size = UP_ROUND(matmul_param_->row_, C4NUM); size = UP_ROUND(matmul_param_->row_, C4NUM);
input_sum_ = reinterpret_cast<int32_t *>(malloc(size * sizeof(int32_t)));
input_sum_ = reinterpret_cast<int32_t *>(ctx_->allocator->Malloc(size * sizeof(int32_t)));
if (input_sum_ == nullptr) { if (input_sum_ == nullptr) {
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }
@@ -198,6 +193,22 @@ int DeConvInt8CPUKernel::InitData() {
return RET_OK; return RET_OK;
} }


void DeConvInt8CPUKernel::FreeRunBuf() {
if (tmp_buffer_ != nullptr) {
ctx_->allocator->Free(tmp_buffer_);
tmp_buffer_ = nullptr;
}
if (tmp_output_ != nullptr) {
ctx_->allocator->Free(tmp_output_);
tmp_output_ = nullptr;
}
if (input_sum_ != nullptr) {
ctx_->allocator->Free(input_sum_);
input_sum_ = nullptr;
}
return;
}

int DeConvInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { int DeConvInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto deconv = reinterpret_cast<DeConvInt8CPUKernel *>(cdata); auto deconv = reinterpret_cast<DeConvInt8CPUKernel *>(cdata);
auto error_code = deconv->DoDeconv(task_id); auto error_code = deconv->DoDeconv(task_id);
@@ -240,6 +251,12 @@ int DeConvInt8CPUKernel::Run() {
int8_t *src_in = reinterpret_cast<int8_t *>(in_tensors_[0]->Data()); int8_t *src_in = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
int8_t *src_out = reinterpret_cast<int8_t *>(out_tensors_[0]->Data()); int8_t *src_out = reinterpret_cast<int8_t *>(out_tensors_[0]->Data());


int error_code = InitRunBuf();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv int8 InitRunBuf error! error_code[" << error_code << "]";
return RET_ERROR;
}

for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
input_trans_func_(src_in + batch_index * matmul_param_->row_ * conv_param_->input_channel_, input_ptr_, input_trans_func_(src_in + batch_index * matmul_param_->row_ * conv_param_->input_channel_, input_ptr_,
matmul_param_->row_, matmul_param_->deep_); matmul_param_->row_, matmul_param_->deep_);
@@ -248,13 +265,14 @@ int DeConvInt8CPUKernel::Run() {
DeConvPackInputSum(input_ptr_, input_sum_, conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, DeConvPackInputSum(input_ptr_, input_sum_, conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_,
UP_ROUND(matmul_param_->row_, C4NUM), UP_ROUND(matmul_param_->deep_, C16NUM), support_optimize_); UP_ROUND(matmul_param_->row_, C4NUM), UP_ROUND(matmul_param_->deep_, C16NUM), support_optimize_);


int error_code = LiteBackendParallelLaunch(DeConvInt8Run, this, thread_count_);
error_code = LiteBackendParallelLaunch(DeConvInt8Run, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]"; MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
} }
} }


FreeRunBuf();
return RET_OK; return RET_OK;
} }




+ 2
- 0
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h View File

@@ -51,6 +51,8 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel {
int InitParam(); int InitParam();
int InitBiasWeight(); int InitBiasWeight();
void CheckSupportOptimize(); void CheckSupportOptimize();
int InitRunBuf();
void FreeRunBuf();


private: private:
int32_t *tmp_buffer_ = nullptr; /* record matmul result */ int32_t *tmp_buffer_ = nullptr; /* record matmul result */


Loading…
Cancel
Save