| @@ -30,9 +30,7 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_MatMul; | |||
| namespace mindspore::kernel { | |||
| MatmulFP16CPUKernel::~MatmulFP16CPUKernel() { FreeTmpBuffer(); } | |||
| void MatmulFP16CPUKernel::FreeTmpBuffer() { | |||
| MatmulFP16CPUKernel::~MatmulFP16CPUKernel() { | |||
| if (a_pack_ptr_ != nullptr) { | |||
| free(a_pack_ptr_); | |||
| a_pack_ptr_ = nullptr; | |||
| @@ -47,6 +45,17 @@ void MatmulFP16CPUKernel::FreeTmpBuffer() { | |||
| } | |||
| } | |||
| void MatmulFP16CPUKernel::FreeTmpBuffer() { | |||
| if (a_pack_ptr_ != nullptr) { | |||
| params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_); | |||
| a_pack_ptr_ = nullptr; | |||
| } | |||
| if (b_pack_ptr_ != nullptr) { | |||
| params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_); | |||
| b_pack_ptr_ = nullptr; | |||
| } | |||
| } | |||
| int MatmulFP16CPUKernel::MallocMatrixABuffer() { | |||
| auto a_shape = in_tensors_[0]->shape(); | |||
| int batch = 1; | |||
| @@ -57,9 +66,13 @@ int MatmulFP16CPUKernel::MallocMatrixABuffer() { | |||
| params_->row_ = params_->a_transpose_ ? a_shape[a_shape.size() - 1] : a_shape[a_shape.size() - 2]; | |||
| params_->deep_ = params_->a_transpose_ ? a_shape[a_shape.size() - 2] : a_shape[a_shape.size() - 1]; | |||
| params_->row_16_ = UP_ROUND(params_->row_, C16NUM); | |||
| a_pack_ptr_ = | |||
| reinterpret_cast<float16_t *>(malloc(params_->batch * params_->row_16_ * params_->deep_ * sizeof(float16_t))); | |||
| if (params_->a_const_) { | |||
| a_pack_ptr_ = | |||
| reinterpret_cast<float16_t *>(malloc(params_->batch * params_->row_16_ * params_->deep_ * sizeof(float16_t))); | |||
| } else { | |||
| a_pack_ptr_ = reinterpret_cast<float16_t *>( | |||
| context_->allocator->Malloc(params_->batch * params_->row_16_ * params_->deep_ * sizeof(float16_t))); | |||
| } | |||
| if (a_pack_ptr_ == nullptr) { | |||
| FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| @@ -82,8 +95,13 @@ int MatmulFP16CPUKernel::MallocMatrixBBuffer() { | |||
| params_->col_8_ = UP_ROUND(params_->col_, 8); | |||
| params_->deep_ = params_->b_transpose_ ? b_shape[b_shape.size() - 1] : b_shape[b_shape.size() - 2]; | |||
| b_pack_ptr_ = | |||
| reinterpret_cast<float16_t *>(malloc(params_->batch * params_->col_8_ * params_->deep_ * sizeof(float16_t))); | |||
| if (params_->b_const_) { | |||
| b_pack_ptr_ = | |||
| reinterpret_cast<float16_t *>(malloc(params_->batch * params_->col_8_ * params_->deep_ * sizeof(float16_t))); | |||
| } else { | |||
| b_pack_ptr_ = reinterpret_cast<float16_t *>( | |||
| context_->allocator->Malloc(params_->batch * params_->col_8_ * params_->deep_ * sizeof(float16_t))); | |||
| } | |||
| if (b_pack_ptr_ == nullptr) { | |||
| FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| @@ -95,59 +113,32 @@ int MatmulFP16CPUKernel::MallocMatrixBBuffer() { | |||
| } | |||
| int MatmulFP16CPUKernel::InitBias() { | |||
| auto b_shape = in_tensors_[1]->shape(); | |||
| auto c_shape = out_tensors_[0]->shape(); | |||
| params_->col_ = params_->b_const_ | |||
| ? (params_->b_transpose_ ? b_shape[b_shape.size() - 2] : b_shape[b_shape.size() - 1]) | |||
| : (c_shape[c_shape.size() - 1]); | |||
| params_->col_8_ = UP_ROUND(params_->col_, 8); | |||
| bias_ptr_ = reinterpret_cast<float16_t *>(malloc(params_->col_8_ * sizeof(float16_t))); | |||
| if (bias_ptr_ == nullptr) { | |||
| FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(bias_ptr_, 0, params_->col_8_ * sizeof(float16_t)); | |||
| if (in_tensors_.size() == 3) { | |||
| auto c_shape = out_tensors_[0]->shape(); | |||
| auto bias_shape = in_tensors_[1]->shape(); | |||
| if (bias_shape[bias_shape.size() - 1] != c_shape[c_shape.size() - 1]) { | |||
| MS_LOG(ERROR) << "The bias'dimension is not equal with colum"; | |||
| FreeTmpBuffer(); | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| auto col = c_shape[c_shape.size() - 1]; | |||
| auto col_8 = UP_ROUND(col, 8); | |||
| bias_ptr_ = reinterpret_cast<float16_t *>(malloc(col_8 * sizeof(float16_t))); | |||
| if (bias_ptr_ == nullptr) { | |||
| FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(bias_ptr_, 0, col_8 * sizeof(float16_t)); | |||
| Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->data_c()), bias_ptr_, col); | |||
| Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->data_c()), bias_ptr_, params_->col_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int MatmulFP16CPUKernel::ReSize() { | |||
| if (params_->a_const_ == false || params_->a_init_shape_ == false) { | |||
| if (a_pack_ptr_ != nullptr) { | |||
| free(a_pack_ptr_); | |||
| a_pack_ptr_ = nullptr; | |||
| } | |||
| auto ret = MallocMatrixABuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 malloc matrix a buffer failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (params_->b_const_ == false || params_->b_init_shape_ == false) { | |||
| if (b_pack_ptr_ != nullptr) { | |||
| free(b_pack_ptr_); | |||
| b_pack_ptr_ = nullptr; | |||
| } | |||
| auto ret = MallocMatrixBBuffer(); | |||
| if (!params_->b_const_) { | |||
| auto ret = InitBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 malloc matrix b buffer failed"; | |||
| MS_LOG(ERROR) << "Matmul fp16 init bias failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (bias_ptr_ != nullptr) { | |||
| free(bias_ptr_); | |||
| bias_ptr_ = nullptr; | |||
| } | |||
| auto ret = InitBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 init bias failed"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -200,47 +191,36 @@ void MatmulFP16CPUKernel::InitMatrixB(float16_t *b_ptr, float16_t *b_pack_ptr) { | |||
| } | |||
| int MatmulFP16CPUKernel::Init() { | |||
| params_->a_init_shape_ = (in_tensors_[0]->shape().size() != 0); | |||
| params_->b_init_shape_ = (in_tensors_[1]->shape().size() != 0); | |||
| if (params_->a_init_shape_ == true) { | |||
| params_->a_const_ = (in_tensors_[0]->data_c() != nullptr); | |||
| params_->b_const_ = (in_tensors_[1]->data_c() != nullptr); | |||
| if (params_->a_const_) { | |||
| auto ret = MallocMatrixABuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 malloc matrix a buffer failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (params_->b_init_shape_ == true) { | |||
| auto ret = MallocMatrixBBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 malloc matrix b buffer failed"; | |||
| MS_LOG(ERROR) << "Matmul fp16 malloc matrix A buffer failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| params_->a_const_ = (in_tensors_[0]->data_c() != nullptr); | |||
| params_->b_const_ = (in_tensors_[1]->data_c() != nullptr); | |||
| if (params_->a_const_ == true) { | |||
| if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { | |||
| InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data_c()), a_pack_ptr_); | |||
| } else { | |||
| InitMatrixA(reinterpret_cast<float16_t *>(in_tensors_[0]->data_c()), a_pack_ptr_); | |||
| } | |||
| } | |||
| if (params_->b_const_ == true) { | |||
| if (params_->b_const_) { | |||
| auto ret = MallocMatrixBBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 malloc matrix B buffer failed"; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | |||
| InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->data_c()), b_pack_ptr_); | |||
| } else { | |||
| InitMatrixB(reinterpret_cast<float16_t *>(in_tensors_[1]->data_c()), b_pack_ptr_); | |||
| } | |||
| } | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| auto ret = InitBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 init bias failed"; | |||
| return RET_ERROR; | |||
| ret = InitBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 init bias failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -275,7 +255,7 @@ int MatmulFP16Run(void *cdata, int task_id) { | |||
| auto op = reinterpret_cast<MatmulFP16CPUKernel *>(cdata); | |||
| auto error_code = op->RunImpl(task_id); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "MatmulFp32Run error task_id[" << task_id << "] error_code[" << error_code << "]"; | |||
| MS_LOG(ERROR) << "MatmulFp16Run error task_id[" << task_id << "] error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| @@ -294,14 +274,24 @@ int MatmulFP16CPUKernel::Run() { | |||
| } else { | |||
| c_ptr = reinterpret_cast<float16_t *>(out_tensor->data_c()); | |||
| } | |||
| if (params_->a_const_ == false) { | |||
| if (!params_->a_const_) { | |||
| ret = MallocMatrixABuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 malloc matrix A buffer failed"; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { | |||
| InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data_c()), a_pack_ptr_); | |||
| } else { | |||
| InitMatrixA(reinterpret_cast<float16_t *>(in_tensors_[0]->data_c()), a_pack_ptr_); | |||
| } | |||
| } | |||
| if (params_->b_const_ == false) { | |||
| if (!params_->b_const_) { | |||
| ret = MallocMatrixBBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 malloc matrix B buffer failed"; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | |||
| InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->data_c()), b_pack_ptr_); | |||
| } else { | |||
| @@ -312,7 +302,12 @@ int MatmulFP16CPUKernel::Run() { | |||
| current_a_ = a_pack_ptr_ + i * params_->row_16_ * params_->deep_; | |||
| current_b_ = b_pack_ptr_ + i * params_->deep_ * params_->col_8_; | |||
| current_c_ = c_ptr + i * params_->row_ * params_->col_; | |||
| ParallelLaunch(this->context_->thread_pool_, MatmulFP16Run, this, thread_count_); | |||
| ret = ParallelLaunch(this->context_->thread_pool_, MatmulFP16Run, this, thread_count_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Matmul fp16 run function MatmulFP16Run failed"; | |||
| FreeTmpBuffer(); | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (out_tensor->data_type() == kNumberTypeFloat32) { | |||
| auto size = out_tensor->ElementsNum(); | |||
| @@ -320,6 +315,14 @@ int MatmulFP16CPUKernel::Run() { | |||
| Float16ToFloat32(output_ptr_, out_tensor_data, size); | |||
| ctx_->allocator->Free(output_ptr_); | |||
| } | |||
| if (!params_->a_const_) { | |||
| context_->allocator->Free(a_pack_ptr_); | |||
| a_pack_ptr_ = nullptr; | |||
| } | |||
| if (!params_->b_const_) { | |||
| context_->allocator->Free(b_pack_ptr_); | |||
| b_pack_ptr_ = nullptr; | |||
| } | |||
| return RET_OK; | |||
| } | |||