Browse Source

!11265 conv fp16 cast delete

From: @ling_qiao_min
Reviewed-by: @zhang_xue_tong
Signed-off-by: @zhang_xue_tong
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 4 years ago
parent
commit
ffa92acdef
15 changed files with 24 additions and 137 deletions
  1. +0
    -4
      mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
  2. +2
    -11
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
  3. +4
    -34
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
  4. +0
    -4
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
  5. +2
    -8
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
  6. +3
    -9
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
  7. +3
    -10
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
  8. +2
    -10
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
  9. +3
    -9
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
  10. +0
    -3
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
  11. +0
    -3
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
  12. +5
    -27
      mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
  13. +0
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.h
  14. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
  15. +0
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h

+ 0
- 4
mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h View File

@@ -36,13 +36,9 @@ class ConcatFp16CPUKernel : public LiteKernel {
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
concat_param_ = reinterpret_cast<ConcatParameter *>(op_parameter_);
}

~ConcatFp16CPUKernel() = default;

int Init() override;

int ReSize() override;

int Run() override;

private:


+ 2
- 11
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc View File

@@ -207,18 +207,12 @@ static int Convolution1x1Fp16RunHw(void *cdata, int task_id) {
}

int Convolution1x1FP16CPUKernel::Run() {
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get executor tensor failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
ConvolutionBaseFP16CPUKernel::GetExecuteTensor();

pack_input_ = reinterpret_cast<float16_t *>(
ctx_->allocator->Malloc(matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t)));
if (pack_input_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return RET_MEMORY_FAILED;
}

@@ -232,6 +226,7 @@ int Convolution1x1FP16CPUKernel::Run() {
input_ptr_ = batch_in;
}

int ret = RET_ERROR;
if (multi_thread_by_hw_) {
ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_);
} else {
@@ -240,16 +235,12 @@ int Convolution1x1FP16CPUKernel::Run() {
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParallelLaunch failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
ctx_->allocator->Free(pack_input_);
pack_input_ = nullptr;
return ret;
}
}

ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();

ctx_->allocator->Free(pack_input_);
pack_input_ = nullptr;
return RET_OK;


+ 4
- 34
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc View File

@@ -33,19 +33,10 @@ ConvolutionBaseFP16CPUKernel::~ConvolutionBaseFP16CPUKernel() {
}

int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() {
// ===================input====================//
auto input_tensor = in_tensors_.at(kInputIndex);
in_data_type_ = input_tensor->data_type();
MS_ASSERT(in_data_type_ == kNumberTypeFloat32 || in_data_type_ == kNumberTypeFloat16);

execute_input_ = ConvertInputFp32toFp16(input_tensor, context_);

// ==================output====================//
auto out_tensor = out_tensors_.at(kOutputIndex);
out_data_type_ = out_tensor->data_type();
MS_ASSERT(out_data_type_ == kNumberTypeFloat32 || out_data_type_ == kNumberTypeFloat16);

execute_output_ = MallocOutputFp16(out_tensor, context_);
auto input_tensor = in_tensors_.at(0);
auto output_tensor = out_tensors_.at(0);
execute_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
execute_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
return RET_OK;
}

@@ -78,25 +69,4 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() {
}
return RET_OK;
}

void ConvolutionBaseFP16CPUKernel::IfCastOutput() {
if (out_data_type_ == kNumberTypeFloat32) {
auto out_tensor = out_tensors_.at(kOutputIndex);
auto out_ele_num = out_tensor->ElementsNum();
auto output_addr = reinterpret_cast<float *>(out_tensor->MutableData());
Float16ToFloat32(execute_output_, output_addr, out_ele_num);
}
}

void ConvolutionBaseFP16CPUKernel::FreeTmpBuffer() {
if (in_data_type_ == kNumberTypeFloat32) {
context_->allocator->Free(execute_input_);
execute_input_ = nullptr;
}
if (out_data_type_ == kNumberTypeFloat32) {
context_->allocator->Free(execute_output_);
execute_output_ = nullptr;
}
}

} // namespace mindspore::kernel

+ 0
- 4
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h View File

@@ -38,16 +38,12 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
int RunImpl(int task_id) { return mindspore::lite::RET_OK; }
virtual int GetExecuteTensor();
virtual int GetExecuteFilter();
virtual void IfCastOutput();
void FreeTmpBuffer();

protected:
float16_t *fp16_weight_ = nullptr;
float16_t *execute_input_ = nullptr;
float16_t *execute_weight_ = nullptr;
float16_t *execute_output_ = nullptr;
TypeId in_data_type_;
TypeId out_data_type_;
};
} // namespace mindspore::kernel



+ 2
- 8
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc View File

@@ -114,19 +114,13 @@ static int ConvDwFp16Run(void *cdata, int task_id) {
}

int ConvolutionDepthwiseFp16CPUKernel::Run() {
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
return ret;
}
ConvolutionBaseFP16CPUKernel::GetExecuteTensor();

ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_);
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
}

ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}



+ 3
- 9
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc View File

@@ -149,13 +149,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
return ret;
}

ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
FreePackedInputOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
ConvolutionBaseFP16CPUKernel::GetExecuteTensor();

if (need_align_) {
PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_,
conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);
@@ -172,8 +167,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
}
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();

FreePackedInputOutput();
return ret;
}


+ 3
- 10
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc View File

@@ -128,17 +128,11 @@ static int ConvolutionFp16Impl(void *cdata, int task_id) {
}

int ConvolutionFP16CPUKernel::Run() {
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
ConvolutionBaseFP16CPUKernel::GetExecuteTensor();

ret = InitTmpBuffer();
auto ret = InitTmpBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init tmp buffer failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeTmpBuffer();
return RET_ERROR;
}
@@ -147,8 +141,7 @@ int ConvolutionFP16CPUKernel::Run() {
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]";
}
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();

FreeTmpBuffer();
return ret;
}


+ 2
- 10
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc View File

@@ -195,17 +195,11 @@ static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) {
}

int ConvolutionWinogradFP16CPUKernel::Run() {
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
ConvolutionBaseFP16CPUKernel::GetExecuteTensor();

ret = InitTmpBuffer();
auto ret = InitTmpBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init tmp buffer failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeTmpBuffer();
return RET_ERROR;
}
@@ -215,8 +209,6 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
}

ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeTmpBuffer();
return ret;
}


+ 3
- 9
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc View File

@@ -162,13 +162,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
return RET_ERROR;
}

ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
FreePackedInputOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
ConvolutionBaseFP16CPUKernel::GetExecuteTensor();

if (need_align_) {
PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_,
conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);
@@ -189,8 +184,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
}
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();

FreePackedInputOutput();
return ret;
}


+ 0
- 3
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc View File

@@ -189,7 +189,6 @@ int DeConvolutionFp16CPUKernel::Run() {
int error_code = InitRunBuf();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeRunBuf();
return RET_ERROR;
}
@@ -206,8 +205,6 @@ int DeConvolutionFp16CPUKernel::Run() {
}
}

ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeRunBuf();
return error_code;
}


+ 0
- 3
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc View File

@@ -405,9 +405,6 @@ int DeConvWinogradFp16CPUKernel::Run() {
ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp16Run, this, thread_num_hw_);
}

ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();

return RET_OK;
}
} // namespace mindspore::kernel

+ 5
- 27
mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc View File

@@ -33,9 +33,6 @@ using mindspore::schema::PrimitiveType_Scale;
namespace mindspore::kernel {

int ScaleFp16CPUKernel::InitScaleOffset() {
auto input_tensor = in_tensors_.at(0);
malloc_input_ = input_tensor->data_type() == kNumberTypeFloat32;

auto scale_tensor = in_tensors_.at(1);
malloc_scale_ = scale_tensor->data_type() == kNumberTypeFloat32;

@@ -45,9 +42,6 @@ int ScaleFp16CPUKernel::InitScaleOffset() {
auto offset_tensor = in_tensors_.at(2);
malloc_offset_ = offset_tensor->data_type() == kNumberTypeFloat32;
}

auto output_tensor = out_tensors_.at(0);
malloc_output_ = output_tensor->data_type() == kNumberTypeFloat32;
return RET_OK;
}

@@ -103,6 +97,11 @@ int ScaleFp16Run(void *cdata, int task_id) {
}

int ScaleFp16CPUKernel::Run() {
auto input_tensor = in_tensors_.at(0);
auto output_tensor = out_tensors_.at(0);
input_ = reinterpret_cast<float16_t *>(input_tensor->MutableData());
output_ = reinterpret_cast<float16_t *>(output_tensor->MutableData());

auto ret = InitScaleOffset();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed.";
@@ -123,20 +122,11 @@ int ScaleFp16CPUKernel::Run() {
return RET_ERROR;
}

// if output tensor is fp32, we need to transform
if (malloc_output_) {
auto out_tensor = out_tensors_.at(0);
Float16ToFloat32(output_, reinterpret_cast<float *>(out_tensor->MutableData()), out_tensor->ElementsNum());
}
FreeTmpBuffer();
return RET_OK;
}

int ScaleFp16CPUKernel::MallocAssignTmpBuffer() {
input_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_);
if (input_ == nullptr) {
return RET_ERROR;
}
scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_);
if (scale_ == nullptr) {
return RET_ERROR;
@@ -155,18 +145,10 @@ int ScaleFp16CPUKernel::MallocAssignTmpBuffer() {
}
memset(offset_, 0, in_tensors_.at(1)->ElementsNum() * sizeof(float16_t));
}
output_ = MallocOutputFp16(out_tensors_.at(0), context_);
if (output_ == nullptr) {
return RET_ERROR;
}
return RET_OK;
}

void ScaleFp16CPUKernel::FreeTmpBuffer() {
if (malloc_input_ && input_ != nullptr) {
context_->allocator->Free(input_);
input_ = nullptr;
}
if (malloc_scale_ && scale_ != nullptr) {
context_->allocator->Free(scale_);
scale_ = nullptr;
@@ -175,10 +157,6 @@ void ScaleFp16CPUKernel::FreeTmpBuffer() {
context_->allocator->Free(offset_);
offset_ = nullptr;
}
if (malloc_output_ && output_ != nullptr) {
context_->allocator->Free(output_);
output_ = nullptr;
}
}

REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Scale, LiteKernelCreator<ScaleFp16CPUKernel>)


+ 0
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.h View File

@@ -43,10 +43,8 @@ class ScaleFp16CPUKernel : public ScaleCPUKernel {
void FreeTmpBuffer();

private:
bool malloc_input_ = false;
bool malloc_scale_ = false;
bool malloc_offset_ = false;
bool malloc_output_ = false;

float16_t *input_ = nullptr;
float16_t *scale_ = nullptr;


+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc View File

@@ -29,7 +29,6 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Stack;

namespace mindspore::kernel {

int StackFp16CPUKernel::Init() {
if (!InferShapeDone()) {
return RET_OK;


+ 0
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h View File

@@ -27,9 +27,7 @@ class StackFp16CPUKernel : public StackCPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: StackCPUKernel(parameter, inputs, outputs, ctx, primitive) {}

~StackFp16CPUKernel() = default;

int Init() override;
int Run() override;



Loading…
Cancel
Save