diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
index 100ae61a0d..3de4d792f9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
@@ -30,9 +30,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_MatMul;
 
 namespace mindspore::kernel {
-MatmulFP16CPUKernel::~MatmulFP16CPUKernel() { FreeTmpBuffer(); }
-
-void MatmulFP16CPUKernel::FreeTmpBuffer() {
+MatmulFP16CPUKernel::~MatmulFP16CPUKernel() {
   if (a_pack_ptr_ != nullptr) {
     free(a_pack_ptr_);
     a_pack_ptr_ = nullptr;
@@ -47,6 +45,17 @@ void MatmulFP16CPUKernel::FreeTmpBuffer() {
   }
 }
 
+void MatmulFP16CPUKernel::FreeTmpBuffer() {
+  if (a_pack_ptr_ != nullptr) {
+    params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_);
+    a_pack_ptr_ = nullptr;
+  }
+  if (b_pack_ptr_ != nullptr) {
+    params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_);
+    b_pack_ptr_ = nullptr;
+  }
+}
+
 int MatmulFP16CPUKernel::MallocMatrixABuffer() {
   auto a_shape = in_tensors_[0]->shape();
   int batch = 1;
@@ -57,9 +66,13 @@ int MatmulFP16CPUKernel::MallocMatrixABuffer() {
   params_->row_ = params_->a_transpose_ ? a_shape[a_shape.size() - 1] : a_shape[a_shape.size() - 2];
   params_->deep_ = params_->a_transpose_ ? a_shape[a_shape.size() - 2] : a_shape[a_shape.size() - 1];
   params_->row_16_ = UP_ROUND(params_->row_, C16NUM);
-
-  a_pack_ptr_ =
-    reinterpret_cast<float16_t *>(malloc(params_->batch * params_->row_16_ * params_->deep_ * sizeof(float16_t)));
+  if (params_->a_const_) {
+    a_pack_ptr_ =
+      reinterpret_cast<float16_t *>(malloc(params_->batch * params_->row_16_ * params_->deep_ * sizeof(float16_t)));
+  } else {
+    a_pack_ptr_ = reinterpret_cast<float16_t *>(
+      context_->allocator->Malloc(params_->batch * params_->row_16_ * params_->deep_ * sizeof(float16_t)));
+  }
   if (a_pack_ptr_ == nullptr) {
     FreeTmpBuffer();
     return RET_MEMORY_FAILED;
@@ -82,8 +95,13 @@ int MatmulFP16CPUKernel::MallocMatrixBBuffer() {
   params_->col_8_ = UP_ROUND(params_->col_, 8);
   params_->deep_ = params_->b_transpose_ ? b_shape[b_shape.size() - 1] : b_shape[b_shape.size() - 2];
 
-  b_pack_ptr_ =
-    reinterpret_cast<float16_t *>(malloc(params_->batch * params_->col_8_ * params_->deep_ * sizeof(float16_t)));
+  if (params_->b_const_) {
+    b_pack_ptr_ =
+      reinterpret_cast<float16_t *>(malloc(params_->batch * params_->col_8_ * params_->deep_ * sizeof(float16_t)));
+  } else {
+    b_pack_ptr_ = reinterpret_cast<float16_t *>(
+      context_->allocator->Malloc(params_->batch * params_->col_8_ * params_->deep_ * sizeof(float16_t)));
+  }
   if (b_pack_ptr_ == nullptr) {
     FreeTmpBuffer();
     return RET_MEMORY_FAILED;
@@ -95,59 +113,32 @@ int MatmulFP16CPUKernel::MallocMatrixBBuffer() {
 }
 
 int MatmulFP16CPUKernel::InitBias() {
+  auto b_shape = in_tensors_[1]->shape();
+  auto c_shape = out_tensors_[0]->shape();
+  params_->col_ = params_->b_const_
+                    ? (params_->b_transpose_ ? b_shape[b_shape.size() - 2] : b_shape[b_shape.size() - 1])
+                    : (c_shape[c_shape.size() - 1]);
+  params_->col_8_ = UP_ROUND(params_->col_, 8);
+  bias_ptr_ = reinterpret_cast<float16_t *>(malloc(params_->col_8_ * sizeof(float16_t)));
+  if (bias_ptr_ == nullptr) {
+    FreeTmpBuffer();
+    return RET_MEMORY_FAILED;
+  }
+  memset(bias_ptr_, 0, params_->col_8_ * sizeof(float16_t));
   if (in_tensors_.size() == 3) {
-    auto c_shape = out_tensors_[0]->shape();
-    auto bias_shape = in_tensors_[1]->shape();
-    if (bias_shape[bias_shape.size() - 1] != c_shape[c_shape.size() - 1]) {
-      MS_LOG(ERROR) << "The bias'dimension is not equal with colum";
-      FreeTmpBuffer();
-      return RET_INPUT_TENSOR_ERROR;
-    }
-    auto col = c_shape[c_shape.size() - 1];
-    auto col_8 = UP_ROUND(col, 8);
-    bias_ptr_ = reinterpret_cast<float16_t *>(malloc(col_8 * sizeof(float16_t)));
-    if (bias_ptr_ == nullptr) {
-      FreeTmpBuffer();
-      return RET_MEMORY_FAILED;
-    }
-    memset(bias_ptr_, 0, col_8 * sizeof(float16_t));
-    Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->data_c()), bias_ptr_, col);
+    Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->data_c()), bias_ptr_, params_->col_);
   }
   return RET_OK;
 }
 
 int MatmulFP16CPUKernel::ReSize() {
-  if (params_->a_const_ == false || params_->a_init_shape_ == false) {
-    if (a_pack_ptr_ != nullptr) {
-      free(a_pack_ptr_);
-      a_pack_ptr_ = nullptr;
-    }
-    auto ret = MallocMatrixABuffer();
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Matmul fp16 malloc matrix a buffer failed";
-      return RET_ERROR;
-    }
-  }
-  if (params_->b_const_ == false || params_->b_init_shape_ == false) {
-    if (b_pack_ptr_ != nullptr) {
-      free(b_pack_ptr_);
-      b_pack_ptr_ = nullptr;
-    }
-    auto ret = MallocMatrixBBuffer();
+  if (!params_->b_const_) {
+    auto ret = InitBias();
     if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Matmul fp16 malloc matrix b buffer failed";
+      MS_LOG(ERROR) << "Matmul fp16 init bias failed";
       return RET_ERROR;
     }
   }
-  if (bias_ptr_ != nullptr) {
-    free(bias_ptr_);
-    bias_ptr_ = nullptr;
-  }
-  auto ret = InitBias();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Matmul fp16 init bias failed";
-    return RET_ERROR;
-  }
   return RET_OK;
 }
 
@@ -200,47 +191,36 @@ void MatmulFP16CPUKernel::InitMatrixB(float16_t *b_ptr, float16_t *b_pack_ptr) {
 }
 
 int MatmulFP16CPUKernel::Init() {
-  params_->a_init_shape_ = (in_tensors_[0]->shape().size() != 0);
-  params_->b_init_shape_ = (in_tensors_[1]->shape().size() != 0);
-  if (params_->a_init_shape_ == true) {
+  params_->a_const_ = (in_tensors_[0]->data_c() != nullptr);
+  params_->b_const_ = (in_tensors_[1]->data_c() != nullptr);
+  if (params_->a_const_) {
     auto ret = MallocMatrixABuffer();
     if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Matmul fp16 malloc matrix a buffer failed";
-      return RET_ERROR;
-    }
-  }
-  if (params_->b_init_shape_ == true) {
-    auto ret = MallocMatrixBBuffer();
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Matmul fp16 malloc matrix b buffer failed";
+      MS_LOG(ERROR) << "Matmul fp16 malloc matrix A buffer failed";
       return RET_ERROR;
     }
-  }
-
-  params_->a_const_ = (in_tensors_[0]->data_c() != nullptr);
-  params_->b_const_ = (in_tensors_[1]->data_c() != nullptr);
-  if (params_->a_const_ == true) {
     if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
       InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data_c()), a_pack_ptr_);
     } else {
       InitMatrixA(reinterpret_cast<float16_t *>(in_tensors_[0]->data_c()), a_pack_ptr_);
     }
   }
-  if (params_->b_const_ == true) {
+  if (params_->b_const_) {
+    auto ret = MallocMatrixBBuffer();
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Matmul fp16 malloc matrix B buffer failed";
+      return RET_ERROR;
+    }
     if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
       InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->data_c()), b_pack_ptr_);
     } else {
       InitMatrixB(reinterpret_cast<float16_t *>(in_tensors_[1]->data_c()), b_pack_ptr_);
     }
-  }
-
-  if (!InferShapeDone()) {
-    return RET_OK;
-  }
-  auto ret = InitBias();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Matmul fp16 init bias failed";
-    return RET_ERROR;
+    ret = InitBias();
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Matmul fp16 init bias failed";
+      return RET_ERROR;
+    }
   }
   return RET_OK;
 }
@@ -275,7 +255,7 @@ int MatmulFP16Run(void *cdata, int task_id) {
   auto op = reinterpret_cast<MatmulFP16CPUKernel *>(cdata);
   auto error_code = op->RunImpl(task_id);
   if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "MatmulFp32Run error task_id[" << task_id << "] error_code[" << error_code << "]";
+    MS_LOG(ERROR) << "MatmulFp16Run error task_id[" << task_id << "] error_code[" << error_code << "]";
     return RET_ERROR;
   }
   return RET_OK;
@@ -294,14 +274,24 @@ int MatmulFP16CPUKernel::Run() {
   } else {
     c_ptr = reinterpret_cast<float16_t *>(out_tensor->data_c());
   }
-  if (params_->a_const_ == false) {
+  if (!params_->a_const_) {
+    ret = MallocMatrixABuffer();
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Matmul fp16 malloc matrix A buffer failed";
+      return RET_ERROR;
+    }
     if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
       InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data_c()), a_pack_ptr_);
     } else {
       InitMatrixA(reinterpret_cast<float16_t *>(in_tensors_[0]->data_c()), a_pack_ptr_);
     }
   }
-  if (params_->b_const_ == false) {
+  if (!params_->b_const_) {
+    ret = MallocMatrixBBuffer();
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Matmul fp16 malloc matrix B buffer failed";
+      return RET_ERROR;
+    }
     if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
       InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->data_c()), b_pack_ptr_);
     } else {
@@ -312,7 +302,12 @@ int MatmulFP16CPUKernel::Run() {
     current_a_ = a_pack_ptr_ + i * params_->row_16_ * params_->deep_;
     current_b_ = b_pack_ptr_ + i * params_->deep_ * params_->col_8_;
     current_c_ = c_ptr + i * params_->row_ * params_->col_;
-    ParallelLaunch(this->context_->thread_pool_, MatmulFP16Run, this, thread_count_);
+    ret = ParallelLaunch(this->context_->thread_pool_, MatmulFP16Run, this, thread_count_);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Matmul fp16 run function MatmulFP16Run failed";
+      FreeTmpBuffer();
+      return RET_ERROR;
+    }
   }
   if (out_tensor->data_type() == kNumberTypeFloat32) {
     auto size = out_tensor->ElementsNum();
@@ -320,6 +315,14 @@ int MatmulFP16CPUKernel::Run() {
     Float16ToFloat32(output_ptr_, out_tensor_data, size);
     ctx_->allocator->Free(output_ptr_);
   }
+  if (!params_->a_const_) {
+    context_->allocator->Free(a_pack_ptr_);
+    a_pack_ptr_ = nullptr;
+  }
+  if (!params_->b_const_) {
+    context_->allocator->Free(b_pack_ptr_);
+    b_pack_ptr_ = nullptr;
+  }
   return RET_OK;
 }