From 0fac817a2d49698aee7badbc9ee2772e755f8c6c Mon Sep 17 00:00:00 2001 From: ling Date: Sat, 22 Aug 2020 11:31:57 +0800 Subject: [PATCH] [MS][LITE][Develop]Fp16 conv1x1 bug --- mindspore/lite/nnacl/fp16/matmul_fp16.c | 26 ++++++++++--- mindspore/lite/nnacl/fp16/matmul_fp16.h | 2 +- .../kernel/arm/fp16/convolution_1x1_fp16.cc | 38 ++++++++++++------- .../kernel/arm/fp16/convolution_1x1_fp16.h | 6 +-- .../kernel/arm/fp16/convolution_base_fp16.cc | 9 +++++ .../kernel/arm/fp16/convolution_base_fp16.h | 2 +- .../kernel/arm/fp16/convolution_fp16.cc | 3 +- 7 files changed, 59 insertions(+), 27 deletions(-) diff --git a/mindspore/lite/nnacl/fp16/matmul_fp16.c b/mindspore/lite/nnacl/fp16/matmul_fp16.c index 7d0b785fd2..3181feb978 100644 --- a/mindspore/lite/nnacl/fp16/matmul_fp16.c +++ b/mindspore/lite/nnacl/fp16/matmul_fp16.c @@ -15,14 +15,28 @@ */ #include "nnacl/fp16/matmul_fp16.h" -void ColMajor2Row8MajorFp16(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col) { - for (int r = 0; r < row; r++) { - for (int c = 0; c < col; c++) { - int cd8 = c / 8; - int cm8 = c % 8; - dst_ptr[cd8 * 8 * row + r * 8 + cm8] = src_ptr[c * row + r]; + +void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_t col, bool src_float16) { + if (src_float16) { + float16_t *src = (float16_t *)src_ptr; + for (int r = 0; r < row; r++) { + for (int c = 0; c < col; c++) { + int cd8 = c / 8; + int cm8 = c % 8; + dst_ptr[cd8 * 8 * row + r * 8 + cm8] = (float16_t)(src[c * row + r]); + } + } + } else { + float *src = (float *)src_ptr; + for (int r = 0; r < row; r++) { + for (int c = 0; c < col; c++) { + int cd8 = c / 8; + int cm8 = c % 8; + dst_ptr[cd8 * 8 * row + r * 8 + cm8] = (float16_t)(src[c * row + r]); + } } } + return; } void MatMul16x8(const float16_t *a, const float16_t *b, float16_t *dst, const float16_t *bias, ActType act_type, diff --git a/mindspore/lite/nnacl/fp16/matmul_fp16.h b/mindspore/lite/nnacl/fp16/matmul_fp16.h index fae70ba61d..0f9212cae8 100644 --- a/mindspore/lite/nnacl/fp16/matmul_fp16.h +++ b/mindspore/lite/nnacl/fp16/matmul_fp16.h @@ -32,7 +32,7 @@ extern "C" { void MatMulFp16(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, ActType act_type, int depth, int row, int col, int stride, bool write_nhwc); -void ColMajor2Row8MajorFp16(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col); +void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_t col, bool src_float16); void RowMajor2Col16MajorFp16(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index 7f56320630..1bd9c81f36 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -74,31 +74,36 @@ int Convolution1x1FP16CPUKernel::InitConv1x1Param() { } int Convolution1x1FP16CPUKernel::InitWeightBias() { - auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Get Execute filter failed."; - return ret; - } + auto bias_tensor = in_tensors_.at(kBiasIndex); + auto weight_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = weight_tensor->Channel(); + auto output_channel = weight_tensor->Batch(); - bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float16_t)); + size_t size = UP_ROUND(output_channel, C8NUM) * sizeof(float16_t); + bias_data_ = malloc(size); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; return RET_ERROR; } - memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float16_t)); + memset(bias_data_, 0, size); if (in_tensors_.size() == 3) { - Float32ToFloat16(reinterpret_cast(in_tensors_[2]->Data()), reinterpret_cast(bias_data_), - conv_param_->output_channel_); + if (bias_tensor->data_type() == kNumberTypeFloat16) { + memcpy(bias_data_, bias_tensor->Data(), output_channel * sizeof(float16_t)); + } else { + Float32ToFloat16(reinterpret_cast(bias_tensor->Data()), reinterpret_cast(bias_data_), + output_channel); + } } - weight_ptr_ = reinterpret_cast(malloc(matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float16_t))); + size = input_channel * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t); + weight_ptr_ = reinterpret_cast(malloc(size)); if (weight_ptr_ == nullptr) { MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!"; return RET_ERROR; } - memset(weight_ptr_, 0, matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float16_t)); - ColMajor2Row8MajorFp16(reinterpret_cast(execute_weight_), weight_ptr_, matmul_param_->deep_, - matmul_param_->col_); + memset(weight_ptr_, 0, size); + ColMajor2Row8MajorFp16(weight_tensor->Data(), weight_ptr_, input_channel, output_channel, + weight_tensor->data_type() == kNumberTypeFloat16); return RET_OK; } @@ -106,6 +111,13 @@ int Convolution1x1FP16CPUKernel::Init() { if (!InferShapeDone()) { return RET_OK; } + + matmul_param_ = new (std::nothrow) MatMulParameter(); + if (matmul_param_ == nullptr) { + MS_LOG(ERROR) << "Init matmul_param_ failed."; + return RET_ERROR; + } + int ret = InitWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h index b2a43426b6..61133c0486 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h @@ -31,9 +31,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const Context *ctx, const mindspore::lite::PrimitiveC *primitive) - : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) { - matmul_param_ = new MatMulParameter(); - } + : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} ~Convolution1x1FP16CPUKernel() override; int Init() override; @@ -50,7 +48,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { private: bool pre_trans_input_ = false; - int thread_count_ = 0; + int thread_count_ = 1; int thread_stride_ = 0; float16_t *weight_ptr_ = nullptr; float16_t *input_ptr_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc index dcc110d068..fc264c1737 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc @@ -23,6 +23,14 @@ #include "src/runtime/runtime_api.h" namespace mindspore::kernel { + +ConvolutionBaseFP16CPUKernel::~ConvolutionBaseFP16CPUKernel() { + if (fp16_weight_ != nullptr) { + free(fp16_weight_); + fp16_weight_ = nullptr; + } +} + int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() { // ===================input====================// auto input_tensor = in_tensors_.at(kInputIndex); @@ -65,6 +73,7 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() { } else { auto *origin_weight = reinterpret_cast(in_tensors_.at(kWeightIndex)->Data()); execute_weight_ = origin_weight; + fp16_weight_ = nullptr; } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h index e507ab3d3e..5029c342c8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h @@ -30,7 +30,7 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { const std::vector &outputs, const Context *ctx, const mindspore::lite::PrimitiveC *primitive) : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~ConvolutionBaseFP16CPUKernel() override = default; + ~ConvolutionBaseFP16CPUKernel() override; int Init() override { return RET_OK; } int ReSize() override { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc index e4c1429b20..8a6f3baf05 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc @@ -244,8 +244,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector