Browse Source

[MS][LITE][Develop]conv1x1 prepare optimize

tags/v1.0.0
ling 5 years ago
parent
commit
116f308d0e
2 changed files with 25 additions and 11 deletions
  1. +14
    -0
      mindspore/lite/nnacl/fp32/matmul.c
  2. +11
    -11
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc

+ 14
- 0
mindspore/lite/nnacl/fp32/matmul.c View File

@@ -129,9 +129,16 @@ void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col)
dst_c[i] = src_c[i * col];
}
}
for (; ci < col4; ci++) {
float *dst_c = dst_r + ci * C12NUM;
for (size_t i = 0; i < C12NUM; i++) {
dst_c[i] = 0;
}
}
src_r += C12NUM * col;
dst_r += C12NUM * col;
}

for (; ri < row; ri++) {
for (size_t i = 0; i < col; i++) {
dst_r[i * C12NUM] = src_r[i];
@@ -139,6 +146,13 @@ void RowMajor2Col12Major(float *src_ptr, float *dst_ptr, size_t row, size_t col)
src_r += col;
dst_r += 1;
}

for (; ri < row12; ri++) {
for (size_t i = 0; i < col; i++) {
dst_r[i * C12NUM] = 0;
}
dst_r += 1;
}
return;
}



+ 11
- 11
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc View File

@@ -35,10 +35,6 @@ Convolution1x1CPUKernel::~Convolution1x1CPUKernel() {
}

void Convolution1x1CPUKernel::FreeTmpBuffer() {
if (pack_input_ != nullptr) {
free(pack_input_);
pack_input_ = nullptr;
}
if (pre_trans_input_ && input_ptr_ != nullptr) {
free(input_ptr_);
input_ptr_ = nullptr;
@@ -103,13 +99,6 @@ int Convolution1x1CPUKernel::InitConv1x1Param() {
thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(matmul_param_->col_, C8NUM));
thread_stride_ = UP_DIV(UP_DIV(matmul_param_->col_, C8NUM), thread_count_) * C8NUM;

pack_input_ = reinterpret_cast<float *>(malloc(matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float)));
if (pack_input_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
return RET_MEMORY_FAILED;
}
memset(pack_input_, 0, matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float));

if (pre_trans_input_) {
input_ptr_ = reinterpret_cast<float *>(malloc(matmul_param_->row_ * matmul_param_->deep_ * sizeof(float)));
if (input_ptr_ == nullptr) {
@@ -179,6 +168,13 @@ int Convolution1x1CPUKernel::Run() {
auto src_in = reinterpret_cast<float *>(in_tensors_[0]->Data());
auto src_out = reinterpret_cast<float *>(out_tensors_[0]->Data());

pack_input_ =
reinterpret_cast<float *>(ctx_->allocator->Malloc(matmul_param_->row_12_ * matmul_param_->deep_ * sizeof(float)));
if (pack_input_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
return RET_MEMORY_FAILED;
}

for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_,
src_out + batch_index * matmul_param_->row_ * matmul_param_->col_);
@@ -190,6 +186,10 @@ int Convolution1x1CPUKernel::Run() {
}
}

if (pack_input_ != nullptr) {
ctx_->allocator->Free(pack_input_);
pack_input_ = nullptr;
}
return RET_OK;
}
} // namespace mindspore::kernel

Loading…
Cancel
Save