From 116f308d0e857b5db8786dcfb53d52709bdffd63 Mon Sep 17 00:00:00 2001 From: ling Date: Wed, 26 Aug 2020 17:22:08 +0800 Subject: [PATCH] [MS][LITE][Develop]conv1x1 prepare optimize --- mindspore/lite/nnacl/fp32/matmul.c | 14 ++++++++++++ .../kernel/arm/fp32/convolution_1x1.cc | 22 +++++++++---------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/mindspore/lite/nnacl/fp32/matmul.c b/mindspore/lite/nnacl/fp32/matmul.c index c44daeb2df..91c4691c5a 100644 --- a/mindspore/lite/nnacl/fp32/matmul.c +++ b/mindspore/lite/nnacl/fp32/matmul.c @@ -129,9 +129,16 @@ void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col) dst_c[i] = src_c[i * col]; } } + for (; ci < col4; ci++) { + float *dst_c = dst_r + ci * C12NUM; + for (size_t i = 0; i < C12NUM; i++) { + dst_c[i] = 0; + } + } src_r += C12NUM * col; dst_r += C12NUM * col; } + for (; ri < row; ri++) { for (size_t i = 0; i < col; i++) { dst_r[i * C12NUM] = src_r[i]; @@ -139,6 +146,13 @@ void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col) src_r += col; dst_r += 1; } + + for (; ri < row12; ri++) { + for (size_t i = 0; i < col; i++) { + dst_r[i * C12NUM] = 0; + } + dst_r += 1; + } return; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc index 62d7afce11..f4c63ec4b7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc @@ -35,10 +35,6 @@ Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { } void Convolution1x1CPUKernel::FreeTmpBuffer() { - if (pack_input_ != nullptr) { - free(pack_input_); - pack_input_ = nullptr; - } if (pre_trans_input_ && input_ptr_ != nullptr) { free(input_ptr_); input_ptr_ = nullptr; @@ -103,13 +99,6 @@ int Convolution1x1CPUKernel::InitConv1x1Param() { thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM)); thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM; - pack_input_ = reinterpret_cast(malloc(matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float))); - if (pack_input_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; - return RET_MEMORY_FAILED; - } - memset(pack_input_, 0, matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float)); - if (pre_trans_input_) { input_ptr_ = reinterpret_cast(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float))); if (input_ptr_ == nullptr) { @@ -179,6 +168,13 @@ int Convolution1x1CPUKernel::Run() { auto src_in = reinterpret_cast(in_tensors_[0]->Data()); auto src_out = reinterpret_cast(out_tensors_[0]->Data()); + pack_input_ = + reinterpret_cast(ctx_->allocator->Malloc(matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float))); + if (pack_input_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; + return RET_MEMORY_FAILED; + } + for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_, src_out + batch_index * matmul_param_->row_ * matmul_param_->col_); @@ -190,6 +186,10 @@ int Convolution1x1CPUKernel::Run() { } } + if (pack_input_ != nullptr) { + ctx_->allocator->Free(pack_input_); + pack_input_ = nullptr; + } return RET_OK; } } // namespace mindspore::kernel