Browse Source

!8503 [MSLITE][Develop] rewrite arm cpu op: matmul init

From: @yangruoqi713
Reviewed-by: 
Signed-off-by:
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
3f20cbc73e
1 changed files with 96 additions and 79 deletions
  1. +96
    -79
      mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc

+ 96
- 79
mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc View File

@@ -31,9 +31,7 @@ using mindspore::lite::RET_ERROR;
using mindspore::schema::PrimitiveType_MatMul; using mindspore::schema::PrimitiveType_MatMul;


namespace mindspore::kernel { namespace mindspore::kernel {
MatmulCPUKernel::~MatmulCPUKernel() { FreeTmpBuffer(); }

void MatmulCPUKernel::FreeTmpBuffer() {
MatmulCPUKernel::~MatmulCPUKernel() {
if (a_pack_ptr_ != nullptr) { if (a_pack_ptr_ != nullptr) {
free(a_pack_ptr_); free(a_pack_ptr_);
a_pack_ptr_ = nullptr; a_pack_ptr_ = nullptr;
@@ -48,6 +46,17 @@ void MatmulCPUKernel::FreeTmpBuffer() {
} }
} }


void MatmulCPUKernel::FreeTmpBuffer() {
if (a_pack_ptr_ != nullptr) {
params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_);
a_pack_ptr_ = nullptr;
}
if (b_pack_ptr_ != nullptr) {
params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_);
b_pack_ptr_ = nullptr;
}
}

int MatmulCPUKernel::MallocMatrixABuffer() { int MatmulCPUKernel::MallocMatrixABuffer() {
auto a_shape = in_tensors_[0]->shape(); auto a_shape = in_tensors_[0]->shape();
int batch = 1; int batch = 1;
@@ -66,20 +75,28 @@ int MatmulCPUKernel::MallocMatrixABuffer() {
params_->row_12_ = UP_ROUND(params_->row_, C12NUM); params_->row_12_ = UP_ROUND(params_->row_, C12NUM);


#if defined(ENABLE_ARM32) || defined(ENABLE_X86_64_SSE) #if defined(ENABLE_ARM32) || defined(ENABLE_X86_64_SSE)
a_pack_ptr_ = reinterpret_cast<float *>(malloc(params_->batch * params_->row_4_ * params_->deep_ * sizeof(float)));
if (params_->a_const_) {
a_pack_ptr_ = reinterpret_cast<float *>(malloc(params_->batch * params_->row_4_ * params_->deep_ * sizeof(float)));
} else {
a_pack_ptr_ = reinterpret_cast<float *>(
context_->allocator->Malloc(params_->batch * params_->row_4_ * params_->deep_ * sizeof(float)));
}
if (a_pack_ptr_ == nullptr) { if (a_pack_ptr_ == nullptr) {
FreeTmpBuffer(); FreeTmpBuffer();
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }
memset(a_pack_ptr_, 0, params_->row_4_ * params_->deep_ * sizeof(float));
#else #else
int row_tmp = is_vector_a_ ? 1 : params_->row_12_; int row_tmp = is_vector_a_ ? 1 : params_->row_12_;
a_pack_ptr_ = reinterpret_cast<float *>(malloc(params_->batch * row_tmp * params_->deep_ * sizeof(float)));
if (params_->a_const_) {
a_pack_ptr_ = reinterpret_cast<float *>(malloc(params_->batch * row_tmp * params_->deep_ * sizeof(float)));
} else {
a_pack_ptr_ =
reinterpret_cast<float *>(context_->allocator->Malloc(params_->batch * row_tmp * params_->deep_ * sizeof(float)));
}
if (a_pack_ptr_ == nullptr) { if (a_pack_ptr_ == nullptr) {
FreeTmpBuffer(); FreeTmpBuffer();
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }
memset(a_pack_ptr_, 0, params_->batch * row_tmp * params_->deep_ * sizeof(float));
#endif #endif
return RET_OK; return RET_OK;
} }
@@ -99,12 +116,16 @@ int MatmulCPUKernel::MallocMatrixBBuffer() {
params_->deep_ = params_->b_transpose_ ? b_shape[b_shape.size() - 1] : b_shape[b_shape.size() - 2]; params_->deep_ = params_->b_transpose_ ? b_shape[b_shape.size() - 1] : b_shape[b_shape.size() - 2];


int col_tmp = is_vector_a_ ? params_->col_ : params_->col_8_; int col_tmp = is_vector_a_ ? params_->col_ : params_->col_8_;
b_pack_ptr_ = reinterpret_cast<float *>(malloc(params_->batch * col_tmp * params_->deep_ * sizeof(float)));
if (params_->b_const_) {
b_pack_ptr_ = reinterpret_cast<float *>(malloc(params_->batch * col_tmp * params_->deep_ * sizeof(float)));
} else {
b_pack_ptr_ =
reinterpret_cast<float *>(context_->allocator->Malloc(params_->batch * col_tmp * params_->deep_ * sizeof(float)));
}
if (b_pack_ptr_ == nullptr) { if (b_pack_ptr_ == nullptr) {
FreeTmpBuffer(); FreeTmpBuffer();
return RET_MEMORY_FAILED; return RET_MEMORY_FAILED;
} }
memset(b_pack_ptr_, 0, params_->batch * col_tmp * params_->deep_ * sizeof(float));


thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8)); thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8));
thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_); thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_);
@@ -112,59 +133,33 @@ int MatmulCPUKernel::MallocMatrixBBuffer() {
} }


int MatmulCPUKernel::InitBias() { int MatmulCPUKernel::InitBias() {
auto b_shape = in_tensors_[1]->shape();
auto c_shape = out_tensors_[0]->shape();
params_->col_ = params_->b_const_
? (params_->b_transpose_ ? b_shape[b_shape.size() - 2] : b_shape[b_shape.size() - 1])
: (c_shape[c_shape.size() - 1]);
params_->col_8_ = UP_ROUND(params_->col_, 8);
auto col_tmp = is_vector_a_ ? params_->col_ : params_->col_8_;
bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float)));
if (bias_ptr_ == nullptr) {
FreeTmpBuffer();
return RET_MEMORY_FAILED;
}
memset(bias_ptr_, 0, col_tmp * sizeof(float));
if (in_tensors_.size() == 3) { if (in_tensors_.size() == 3) {
auto c_shape = out_tensors_[0]->shape();
auto bias_shape = in_tensors_[1]->shape();
if (bias_shape[bias_shape.size() - 1] != c_shape[c_shape.size() - 1]) {
MS_LOG(ERROR) << "The bias'dimension is not equal with colum";
FreeTmpBuffer();
return RET_INPUT_TENSOR_ERROR;
}
auto col = c_shape[c_shape.size() - 1];
auto col_8 = UP_ROUND(col, 8);
auto col_tmp = is_vector_a_ ? col : col_8;
bias_ptr_ = reinterpret_cast<float *>(malloc(col_tmp * sizeof(float)));
if (bias_ptr_ == nullptr) {
FreeTmpBuffer();
return RET_MEMORY_FAILED;
}
memcpy(bias_ptr_, in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum() * sizeof(float)); memcpy(bias_ptr_, in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum() * sizeof(float));
} }
return RET_OK; return RET_OK;
} }


int MatmulCPUKernel::ReSize() { int MatmulCPUKernel::ReSize() {
if (params_->a_const_ == false || params_->a_init_shape_ == false) {
if (a_pack_ptr_ != nullptr) {
free(a_pack_ptr_);
a_pack_ptr_ = nullptr;
}
auto ret = MallocMatrixABuffer();
if (!params_->b_const_) {
auto ret = InitBias();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 malloc matrix a buffer failed";
MS_LOG(ERROR) << "Matmul fp32 init bias failed";
return RET_ERROR; return RET_ERROR;
} }
} }
if (params_->b_const_ == false || params_->b_init_shape_ == false) {
if (b_pack_ptr_ != nullptr) {
free(b_pack_ptr_);
b_pack_ptr_ = nullptr;
}
auto ret = MallocMatrixBBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 malloc matrix b buffer failed";
return RET_ERROR;
}
}
if (bias_ptr_ != nullptr) {
free(bias_ptr_);
bias_ptr_ = nullptr;
}
auto ret = InitBias();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 init bias failed";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }


@@ -222,40 +217,31 @@ void MatmulCPUKernel::InitMatrixB(float *src_ptr, float *dst_ptr) {
} }


int MatmulCPUKernel::Init() { int MatmulCPUKernel::Init() {
params_->a_init_shape_ = (in_tensors_[0]->shape().size() != 0);
params_->b_init_shape_ = (in_tensors_[1]->shape().size() != 0);
if (params_->a_init_shape_ == true) {
params_->a_const_ = (in_tensors_[0]->data_c() != nullptr);
params_->b_const_ = (in_tensors_[1]->data_c() != nullptr);
if (params_->a_const_) {
auto ret = MallocMatrixABuffer(); auto ret = MallocMatrixABuffer();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 malloc matrix a buffer failed";
MS_LOG(ERROR) << "Matmul fp32 malloc matrix buffer failed";
return RET_ERROR; return RET_ERROR;
} }
InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data_c()), a_pack_ptr_);
a_ptr_ = a_pack_ptr_;
} }
if (params_->b_init_shape_ == true) {
if (params_->b_const_) {
auto ret = MallocMatrixBBuffer(); auto ret = MallocMatrixBBuffer();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 malloc matrix b buffer failed";
MS_LOG(ERROR) << "Matmul fp32 malloc matrix B buffer failed";
return RET_ERROR; return RET_ERROR;
} }
}

params_->a_const_ = (in_tensors_[0]->data_c() != nullptr);
params_->b_const_ = (in_tensors_[1]->data_c() != nullptr);
if (params_->a_const_ == true) {
InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data_c()), a_pack_ptr_);
a_ptr_ = a_pack_ptr_;
}
if (params_->b_const_ == true) {
InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->data_c()), b_pack_ptr_); InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->data_c()), b_pack_ptr_);
b_ptr_ = b_pack_ptr_; b_ptr_ = b_pack_ptr_;
}
if (!InferShapeDone()) {
return RET_OK;
}
auto ret = InitBias();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 init bias failed";
return RET_ERROR;
// init bias
ret = InitBias();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 init bias failed";
return RET_ERROR;
}
} }
return RET_OK; return RET_OK;
} }
@@ -291,7 +277,16 @@ int MatmulCPUKernel::Run() {
auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c()); auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c());
auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c()); auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c());


if (params_->a_const_ == false || is_train()) {
if (!params_->a_const_ || is_train()) {
if (a_pack_ptr_ != nullptr) {
params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_);
a_pack_ptr_ = nullptr;
}
auto ret = MallocMatrixABuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 malloc matrix a buffer failed";
return RET_ERROR;
}
if (is_vector_a_) { if (is_vector_a_) {
a_ptr_ = a_src; a_ptr_ = a_src;
} else { } else {
@@ -299,7 +294,16 @@ int MatmulCPUKernel::Run() {
a_ptr_ = a_pack_ptr_; a_ptr_ = a_pack_ptr_;
} }
} }
if (params_->b_const_ == false || is_train()) {
if (!params_->b_const_ || is_train()) {
if (b_pack_ptr_ != nullptr) {
params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_);
b_pack_ptr_ = nullptr;
}
auto ret = MallocMatrixBBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 malloc matrix b buffer failed";
return RET_ERROR;
}
if (is_vector_a_ && params_->b_transpose_) { if (is_vector_a_ && params_->b_transpose_) {
b_ptr_ = b_src; b_ptr_ = b_src;
} else { } else {
@@ -318,7 +322,20 @@ int MatmulCPUKernel::Run() {
cur_b_ptr_ = b_ptr_ + i * params_->deep_ * params_->col_8_; cur_b_ptr_ = b_ptr_ + i * params_->deep_ * params_->col_8_;
cur_c_ptr_ = c_src + i * params_->row_ * params_->col_; cur_c_ptr_ = c_src + i * params_->row_ * params_->col_;
} }
ParallelLaunch(this->context_->thread_pool_, MatmulFloatRun, this, thread_count_);
auto ret = ParallelLaunch(this->context_->thread_pool_, MatmulFloatRun, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Matmul fp32 run function MatmulFloatRun failed";
FreeTmpBuffer();
return RET_ERROR;
}
}
if (!params_->a_const_ || is_train()) {
context_->allocator->Free(a_pack_ptr_);
a_pack_ptr_ = nullptr;
}
if (!params_->b_const_ || is_train()) {
context_->allocator->Free(b_pack_ptr_);
b_pack_ptr_ = nullptr;
} }
return RET_OK; return RET_OK;
} }
@@ -326,10 +343,10 @@ int MatmulCPUKernel::Run() {
void MatmulCPUKernel::eval() { void MatmulCPUKernel::eval() {
// Copy weights after training // Copy weights after training
LiteKernel::eval(); LiteKernel::eval();
if (params_->a_const_ == true) {
if (params_->a_const_) {
InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->MutableData()), a_pack_ptr_); InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->MutableData()), a_pack_ptr_);
} }
if (params_->b_const_ == true) {
if (params_->b_const_) {
InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->MutableData()), b_pack_ptr_); InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->MutableData()), b_pack_ptr_);
} }
} }


Loading…
Cancel
Save