Browse Source

!5274 [MS][LITE] arm cpu op conv depthwise, deconv depthwise: malloc input and output temp buffer using allocator

Merge pull request !5274 from yangruoqi713/lite
tags/v1.0.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
275e286216
14 changed files with 84 additions and 165 deletions
  1. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
  2. +12
    -25
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
  3. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
  4. +11
    -24
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
  5. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
  6. +5
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
  7. +14
    -25
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc
  8. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h
  9. +18
    -29
      mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc
  10. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h
  11. +13
    -25
      mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
  12. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
  13. +7
    -23
      mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc
  14. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h

+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc View File

@@ -109,15 +109,15 @@ static int ConvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionDepthwiseFp16CPUKernel::Run() {
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}

ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {


+ 12
- 25
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc View File

@@ -38,20 +38,6 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() {
delete packed_weight_;
packed_weight_ = nullptr;
}
FreeTmpBuffer();
}

void ConvolutionDepthwiseSWFp16CPUKernel::FreeTmpBuffer() {
if (need_align_) {
if (packed_input_ != nullptr) {
delete packed_input_;
packed_input_ = nullptr;
}
if (packed_output_ != nullptr) {
delete packed_output_;
packed_output_ = nullptr;
}
}
}

int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() {
@@ -59,14 +45,14 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() {
need_align_ = true;
int C8 = UP_DIV(conv_param_->input_channel_, C8NUM);
int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * C8;
packed_input_ = reinterpret_cast<float16_t *>(malloc(pack_input_size * sizeof(float16_t)));
packed_input_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_input_size * sizeof(float16_t)));
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}

int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * C8;
packed_output_ = reinterpret_cast<float16_t *>(malloc(pack_output_size * sizeof(float16_t)));
packed_output_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_output_size * sizeof(float16_t)));
if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -129,18 +115,11 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Init() {
}

int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() {
FreeTmpBuffer();
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
InitSlidingParamConvDw(sliding_, conv_param_, C8NUM);

ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed.";
return RET_ERROR;
}
return RET_OK;
}

@@ -161,13 +140,19 @@ static int ConvDwSWFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata)
}

int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}

auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed.";
return RET_ERROR;
}

@@ -194,6 +179,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
if (need_align_) {
PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
}
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();


+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h View File

@@ -50,7 +50,6 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
int Execute(int task_id);

private:
void FreeTmpBuffer();
SlidingWindowParam *sliding_ = nullptr;
float16_t *packed_weight_ = nullptr;
float16_t *packed_input_ = nullptr;


+ 11
- 24
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc View File

@@ -37,20 +37,6 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() {
delete packed_weight_;
packed_weight_ = nullptr;
}
FreeTmpBuffer();
}

void DeconvolutionDepthwiseFp16CPUKernel::FreeTmpBuffer() {
if (need_align_) {
if (packed_input_ != nullptr) {
delete packed_input_;
packed_input_ = nullptr;
}
if (packed_output_ != nullptr) {
delete packed_output_;
packed_output_ = nullptr;
}
}
}

int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() {
@@ -69,14 +55,14 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() {
int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() {
int C8 = UP_DIV(conv_param_->input_channel_, C8NUM);
int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * C8;
packed_input_ = reinterpret_cast<float16_t *>(malloc(pack_input_size * sizeof(float16_t)));
packed_input_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_input_size * sizeof(float16_t)));
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}

int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * C8;
packed_output_ = reinterpret_cast<float16_t *>(malloc(pack_output_size * sizeof(float16_t)));
packed_output_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_output_size * sizeof(float16_t)));
if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -137,17 +123,11 @@ int DeconvolutionDepthwiseFp16CPUKernel::Init() {
}

int DeconvolutionDepthwiseFp16CPUKernel::ReSize() {
FreeTmpBuffer();
InitSlideParam();
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed.";
return RET_ERROR;
}
return RET_OK;
}

@@ -168,13 +148,18 @@ static int DeconvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata)
}

int DeconvolutionDepthwiseFp16CPUKernel::Run() {
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed.";
return RET_ERROR;
}

@@ -202,6 +187,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
if (need_align_) {
PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
}
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();


+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h View File

@@ -52,7 +52,6 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
int Execute(int task_id);

private:
void FreeTmpBuffer();
SlidingWindowParam *sliding_ = nullptr;
float16_t *packed_weight_ = nullptr;
float16_t *packed_input_ = nullptr;


+ 5
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc View File

@@ -100,15 +100,16 @@ int ConvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionDepthwiseCPUKernel::Run() {
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}

auto input_tensor = in_tensors_.at(kInputIndex);
input_ptr_ = reinterpret_cast<float *>(input_tensor->Data());



+ 14
- 25
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc View File

@@ -36,20 +36,6 @@ ConvolutionDepthwiseSWCPUKernel::~ConvolutionDepthwiseSWCPUKernel() {
delete packed_weight_;
packed_weight_ = nullptr;
}
FreeTmpBuffer();
}

void ConvolutionDepthwiseSWCPUKernel::FreeTmpBuffer() {
if (need_align_) {
if (packed_input_ != nullptr) {
delete packed_input_;
packed_input_ = nullptr;
}
if (packed_output_ != nullptr) {
delete packed_output_;
packed_output_ = nullptr;
}
}
}

int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() {
@@ -89,7 +75,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitBuffer() {
need_align_ = true;
int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * IC4;
packed_input_ = reinterpret_cast<float *>(malloc(pack_input_size * sizeof(float)));
packed_input_ = reinterpret_cast<float *>(context_->allocator->Malloc(pack_input_size * sizeof(float)));
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -97,7 +83,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitBuffer() {

int OC4 = UP_DIV(conv_param_->output_channel_, C4NUM);
int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * OC4;
packed_output_ = reinterpret_cast<float *>(malloc(pack_output_size * sizeof(float)));
packed_output_ = reinterpret_cast<float *>(context_->allocator->Malloc(pack_output_size * sizeof(float)));
if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -125,16 +111,9 @@ int ConvolutionDepthwiseSWCPUKernel::Init() {
}

int ConvolutionDepthwiseSWCPUKernel::ReSize() {
FreeTmpBuffer();
ConvolutionBaseCPUKernel::Init();
InitSlidingParamConvDw(sliding_, conv_param_, C4NUM);
conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_);

auto ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed.";
return RET_ERROR;
}
return RET_OK;
}

@@ -155,13 +134,20 @@ int ConvDwSWRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionDepthwiseSWCPUKernel::Run() {
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}

auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";

ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed.";
return RET_ERROR;
}
auto input_tensor = in_tensors_.at(kInputIndex);
@@ -190,7 +176,10 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
if (need_align_) {
PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
}

return RET_OK;
}
} // namespace mindspore::kernel

+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.h View File

@@ -40,7 +40,6 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel {
int Execute(int task_id);

private:
void FreeTmpBuffer();
SlidingWindowParam *sliding_ = nullptr;
float *packed_weight_ = nullptr;
float *packed_input_ = nullptr;


+ 18
- 29
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc View File

@@ -36,20 +36,6 @@ DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() {
delete packed_weight_;
packed_weight_ = nullptr;
}
FreeTmpBuffer();
}

void DeconvolutionDepthwiseCPUKernel::FreeTmpBuffer() {
if (need_align_) {
if (packed_input_ != nullptr) {
delete packed_input_;
packed_input_ = nullptr;
}
if (packed_output_ != nullptr) {
delete packed_output_;
packed_output_ = nullptr;
}
}
}

int DeconvolutionDepthwiseCPUKernel::InitSlideParam() {
@@ -100,7 +86,7 @@ int DeconvolutionDepthwiseCPUKernel::InitBuffer() {
need_align_ = true;
int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM * IC4;
packed_input_ = reinterpret_cast<float *>(malloc(pack_input_size * sizeof(float)));
packed_input_ = reinterpret_cast<float *>(context_->allocator->Malloc(pack_input_size * sizeof(float)));
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -108,7 +94,7 @@ int DeconvolutionDepthwiseCPUKernel::InitBuffer() {

int OC4 = UP_DIV(conv_param_->output_channel_, C4NUM);
int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * OC4;
packed_output_ = reinterpret_cast<float *>(malloc(pack_output_size * sizeof(float)));
packed_output_ = reinterpret_cast<float *>(context_->allocator->Malloc(pack_output_size * sizeof(float)));
if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -137,15 +123,8 @@ int DeconvolutionDepthwiseCPUKernel::Init() {
}

int DeconvolutionDepthwiseCPUKernel::ReSize() {
FreeTmpBuffer();
InitSlideParam();
ConvolutionBaseCPUKernel::Init();

auto ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret;
return ret;
}
return RET_OK;
}

@@ -166,15 +145,23 @@ int DeconvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int DeconvolutionDepthwiseCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}

auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
return ret;
}

ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret;
return ret;
}

auto input_tensor = in_tensors_.at(kInputIndex);
auto input_addr = reinterpret_cast<float *>(input_tensor->Data());

@@ -191,7 +178,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
packed_output_ = output_addr;
}

auto ret = LiteBackendParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
return RET_ERROR;
@@ -200,6 +187,8 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
if (need_align_) {
PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
}
return RET_OK;
}


+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.h View File

@@ -41,7 +41,6 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
int Execute(int task_id);

private:
void FreeTmpBuffer();
SlidingWindowParam *sliding_ = nullptr;
float *packed_weight_ = nullptr;
float *packed_input_ = nullptr;


+ 13
- 25
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc View File

@@ -28,19 +28,6 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_DepthwiseConv2D;

namespace mindspore::kernel {
void ConvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() {
if (packed_input_ != nullptr) {
free(packed_input_);
packed_input_ = nullptr;
}
if (need_align_) {
if (packed_output_ != nullptr) {
free(packed_output_);
packed_output_ = nullptr;
}
}
}

ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() {
if (sliding != nullptr) {
delete sliding;
@@ -50,7 +37,6 @@ ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() {
free(packed_weight_);
packed_weight_ = nullptr;
}
FreeTmpBuffer();
FreeQuantParam();
}

@@ -88,7 +74,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() {
int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM *
UP_DIV(conv_param_->input_channel_, 4);
packed_input_ = reinterpret_cast<int16_t *>(malloc(pack_input_size * sizeof(int16_t)));
packed_input_ = reinterpret_cast<int16_t *>(context_->allocator->Malloc(pack_input_size * sizeof(int16_t)));
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -98,7 +84,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() {
need_align_ = true;
int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM *
UP_DIV(conv_param_->output_channel_, C4NUM);
packed_output_ = reinterpret_cast<int8_t *>(malloc(pack_output_size * sizeof(int8_t)));
packed_output_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(pack_output_size * sizeof(int8_t)));
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -120,7 +106,6 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() {
}

int ConvolutionDepthwiseInt8CPUKernel::ReSize() {
FreeTmpBuffer();
ConvolutionBaseCPUKernel::Init();
InitSlidingParamConvDw(sliding, conv_param_, C4NUM);

@@ -134,11 +119,6 @@ int ConvolutionDepthwiseInt8CPUKernel::ReSize() {
MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!";
return ret;
}
ret = InitBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
return ret;
}
return RET_OK;
}

@@ -159,14 +139,20 @@ int ConvDwInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionDepthwiseInt8CPUKernel::Run() {
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;

ret = InitBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
return ret;
}

auto input_tensor = in_tensors_.at(kInputIndex);
@@ -187,7 +173,9 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() {
if (need_align_) {
PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_output_);
}
context_->allocator->Free(packed_input_);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h View File

@@ -40,7 +40,6 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
int Execute(int task_id);

private:
void FreeTmpBuffer();
SlidingWindowParam *sliding = nullptr;
int16_t *packed_weight_ = nullptr;
int16_t *packed_input_ = nullptr;


+ 7
- 23
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc View File

@@ -37,27 +37,9 @@ DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() {
delete packed_weight_;
packed_weight_ = nullptr;
}
FreeTmpBuffer();
FreeQuantParam();
}

void DeconvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() {
if (packed_input_ != nullptr) {
delete packed_input_;
packed_input_ = nullptr;
}
if (need_align_) {
if (packed_output_ != nullptr) {
delete packed_output_;
packed_output_ = nullptr;
}
}
if (output_buffer_ != nullptr) {
delete output_buffer_;
output_buffer_ = nullptr;
}
}

int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
// init weight: int8 -> int16
// o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1
@@ -111,7 +93,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitSlideParam() {
int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() {
int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM *
UP_DIV(conv_param_->input_channel_, 4);
packed_input_ = reinterpret_cast<int16_t *>(malloc(pack_input_size * sizeof(int16_t)));
packed_input_ = reinterpret_cast<int16_t *>(context_->allocator->Malloc(pack_input_size * sizeof(int16_t)));
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -121,7 +103,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() {
need_align_ = true;
int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM *
UP_DIV(conv_param_->output_channel_, C4NUM);
packed_output_ = reinterpret_cast<int8_t *>(malloc(pack_output_size * sizeof(int8_t)));
packed_output_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(pack_output_size * sizeof(int8_t)));
if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -129,8 +111,8 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() {
memset(packed_output_, 0, pack_output_size * sizeof(int8_t));
}

output_buffer_ =
reinterpret_cast<int32_t *>(malloc(conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * sizeof(int32_t)));
output_buffer_ = reinterpret_cast<int32_t *>(
context_->allocator->Malloc(conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * sizeof(int32_t)));
if (output_buffer_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@@ -165,7 +147,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() {
}

int DeconvolutionDepthwiseInt8CPUKernel::ReSize() {
FreeTmpBuffer();
InitSlideParam();
ConvolutionBaseCPUKernel::Init();

@@ -224,7 +205,10 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
if (need_align_) {
PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_output_);
}
context_->allocator->Free(packed_input_);
context_->allocator->Free(output_buffer_);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h View File

@@ -41,7 +41,6 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
int Execute(int task_id);

private:
void FreeTmpBuffer();
SlidingWindowParam *sliding = nullptr;
int16_t *packed_weight_ = nullptr;
int16_t *packed_input_ = nullptr;


Loading…
Cancel
Save