diff --git a/mindspore/lite/nnacl/fp32/elu_fp32.c b/mindspore/lite/nnacl/fp32/elu_fp32.c
index 87a3e27e22..aed9283eec 100644
--- a/mindspore/lite/nnacl/fp32/elu_fp32.c
+++ b/mindspore/lite/nnacl/fp32/elu_fp32.c
@@ -23,7 +23,7 @@ void Calculate_Data(const float *input_data, float *output_data, int num, EluPar
 }
 
 int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) {
-  for (size_t i = task_id; i < parameter->in_size_; i += parameter->thread_num_) {
+  for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) {
     Calculate_Data(input_data, output_data, i, parameter);
   }
   return NNACL_OK;
diff --git a/mindspore/lite/nnacl/fp32/elu_fp32.h b/mindspore/lite/nnacl/fp32/elu_fp32.h
index d60826135f..2a941ee7c5 100644
--- a/mindspore/lite/nnacl/fp32/elu_fp32.h
+++ b/mindspore/lite/nnacl/fp32/elu_fp32.h
@@ -22,7 +22,6 @@
 typedef struct EluParameter {
   OpParameter op_parameter_;
   float alpha_;
-  int thread_num_;
   int in_size_;
 } EluParameter;
 
diff --git a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c
index 858700b856..7abb2948fa 100644
--- a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c
+++ b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c
@@ -47,7 +47,7 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding
 }
 
 int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) {
-  for (size_t i = task_id; i < parameter->ids_size_; i += parameter->thread_num) {
+  for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
     int ret = CopyData(input_data, ids, output_data, i, parameter);
     if (ret != NNACL_OK) {
       return ret;
diff --git a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h
index 0eeda37185..e984ee5c84 100644
--- a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h
+++ b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h
@@ -26,7 +26,6 @@ typedef struct EmbeddingLookupParameter {
   int ids_size_;
   int layer_size_;
   int layer_num_;
-  int thread_num;
 } EmbeddingLookupParameter;
 
 #ifdef __cplusplus
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
index 3844464cd9..0de96878ec 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
@@ -40,7 +40,7 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() {
   }
 }
 
-int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() {
+int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() {
   if (conv_param_->input_channel_ % C8NUM != 0) {
     need_align_ = true;
     int C8 = UP_DIV(conv_param_->input_channel_, C8NUM);
@@ -142,19 +142,17 @@ static int ConvDwSWFp16Run(void *cdata, int task_id) {
 }
 
 int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
-  auto ret = InitBuffer();
+  auto ret = InitPackedInputOutput();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed.";
-    context_->allocator->Free(packed_input_);
-    context_->allocator->Free(packed_output_);
+    MS_LOG(ERROR) << "Convolution depthwise fp16 InitPackedInputOutput failed.";
+    FreePackedInputOutput();
     return ret;
   }
 
   ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Get Execute tensor failed.";
-    context_->allocator->Free(packed_input_);
-    context_->allocator->Free(packed_output_);
+    FreePackedInputOutput();
     ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
     return ret;
   }
@@ -173,11 +171,19 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
   if (need_align_) {
     PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
                         conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
-    context_->allocator->Free(packed_input_);
-    context_->allocator->Free(packed_output_);
   }
   ConvolutionBaseFP16CPUKernel::IfCastOutput();
   ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
-  return RET_OK;
+  FreePackedInputOutput();
+  return ret;
+}
+
+void ConvolutionDepthwiseSWFp16CPUKernel::FreePackedInputOutput() {
+  if (need_align_) {
+    context_->allocator->Free(packed_input_);
+    context_->allocator->Free(packed_output_);
+    packed_input_ = nullptr;
+    packed_output_ = nullptr;
+  }
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
index 15e1169ed5..7f44731930 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
@@ -45,11 +45,12 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
   int ReSize() override;
   int Run() override;
 
-  int InitBuffer();
+  int InitPackedInputOutput();
   int InitWeightBias();
   int Execute(int task_id);
 
  private:
+  void FreePackedInputOutput();
   SlidingWindowParam *sliding_ = nullptr;
   float16_t *packed_weight_ = nullptr;
   float16_t *packed_input_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
index 44c97598f3..54bceb7514 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
@@ -53,7 +53,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() {
   return RET_OK;
 }
 
-int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() {
+int DeconvolutionDepthwiseFp16CPUKernel::InitPackedInputOutput() {
   if (conv_param_->input_channel_ % C8NUM != 0) {
     need_align_ = true;
     int C8 = UP_DIV(conv_param_->input_channel_, C8NUM);
@@ -156,19 +156,17 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
     MS_LOG(ERROR) << "Only support input channel equals output channel.";
     return RET_ERROR;
   }
-  auto ret = InitBuffer();
+  auto ret = InitPackedInputOutput();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed.";
-    context_->allocator->Free(packed_input_);
-    context_->allocator->Free(packed_output_);
+    MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitPackedInputOutput failed.";
+    FreePackedInputOutput();
     return RET_ERROR;
   }
 
   ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Get Execute tensor failed.";
-    context_->allocator->Free(packed_input_);
-    context_->allocator->Free(packed_output_);
+    FreePackedInputOutput();
     ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
     return ret;
   }
@@ -191,14 +189,22 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
   if (need_align_) {
     PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
                         conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
-    context_->allocator->Free(packed_input_);
-    context_->allocator->Free(packed_output_);
   }
   ConvolutionBaseFP16CPUKernel::IfCastOutput();
   ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
+  FreePackedInputOutput();
   return ret;
 }
 
+void DeconvolutionDepthwiseFp16CPUKernel::FreePackedInputOutput() {
+  if (need_align_) {
+    context_->allocator->Free(packed_input_);
+    context_->allocator->Free(packed_output_);
+    packed_input_ = nullptr;
+    packed_output_ = nullptr;
+  }
+}
+
 kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
index f301fd0a40..71f81d5e98 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
@@ -46,12 +46,13 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
   int ReSize() override;
   int Run() override;
 
-  int InitBuffer();
+  int InitPackedInputOutput();
   int InitWeightBias();
   int InitSlideParam();
   int Execute(int task_id);
 
  private:
+  void FreePackedInputOutput();
   SlidingWindowParam *sliding_ = nullptr;
   float16_t *packed_weight_ = nullptr;
   float16_t *packed_input_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
index 5ed33e0d18..e886b3da71 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
@@ -183,7 +183,7 @@ int DeConvolutionFp16CPUKernel::Run() {
 
   int error_code = InitRunBuf();
   if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
+    MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]";
     ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
     FreeRunBuf();
     return RET_ERROR;
@@ -197,7 +197,7 @@ int DeConvolutionFp16CPUKernel::Run() {
 
     error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_);
     if (error_code != RET_OK) {
-      MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
+      MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
     }
   }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
index e15d1a5502..267196f68a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
@@ -70,7 +70,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() {
   return RET_OK;
 }
 
-int ConvolutionDepthwiseSWCPUKernel::InitBuffer() {
+int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() {
   if (conv_param_->input_channel_ % C4NUM != 0) {
     need_align_ = true;
     int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
@@ -134,9 +134,10 @@ int ConvDwSWRun(void *cdata, int task_id) {
 }
 
 int ConvolutionDepthwiseSWCPUKernel::Run() {
-  auto ret = InitBuffer();
+  auto ret = InitPackedInputOutput();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp32 InitPackedInputOutput failed.";
+    FreePackedInputOutput();
     return RET_ERROR;
   }
   auto input_tensor = in_tensors_.at(kInputIndex);
@@ -159,16 +160,22 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
   ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
-    return RET_ERROR;
   }
 
   if (need_align_) {
     PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_,
                         conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
+  }
+  FreePackedInputOutput();
+  return ret;
+}
+
+void ConvolutionDepthwiseSWCPUKernel::FreePackedInputOutput() {
+  if (need_align_) {
     context_->allocator->Free(packed_input_);
     context_->allocator->Free(packed_output_);
+    packed_input_ = nullptr;
+    packed_output_ = nullptr;
   }
-
-  return RET_OK;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
index ac40964498..12c8cbc1dc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
@@ -35,11 +35,12 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel {
   int ReSize() override;
   int Run() override;
 
-  int InitBuffer();
   int InitWeightBias();
   int Execute(int task_id);
 
  private:
+  int InitPackedInputOutput();
+  void FreePackedInputOutput();
   SlidingWindowParam *sliding_ = nullptr;
   float *packed_weight_ = nullptr;
   float *packed_input_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
index 3418c404a6..ae12237fba 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
@@ -146,21 +146,20 @@ int ConvolutionCPUKernel::Run() {
   auto ret = InitTmpBuffer();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init tmp buffer failed.";
+    FreeTmpBuffer();
     return RET_ERROR;
   }
 
-  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_);
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
-    FreeTmpBuffer();
-    return RET_ERROR;
+  ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
   }
   FreeTmpBuffer();
-  return RET_OK;
+  return ret;
 }
 
 ConvParameter *CreateNewConvParameter(ConvParameter *parameter) {
-  auto conv_parameter = reinterpret_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
+  auto conv_parameter = new (std::nothrow) ConvParameter;
   if (conv_parameter == nullptr) {
     MS_LOG(ERROR) << "Malloc new conv parameter failed.";
     return nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
index 121b7797d5..bfb91c49ff 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
@@ -222,17 +222,16 @@ int ConvolutionWinogradCPUKernel::Run() {
   auto ret = InitTmpBuffer();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init tmp buffer failed.";
+    FreeTmpBuffer();
     return RET_ERROR;
   }
 
-  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_);
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]";
-    FreeTmpBuffer();
-    return RET_ERROR;
+  ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
   }
 
   FreeTmpBuffer();
-  return RET_OK;
+  return ret;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
index 5af04ba8e7..5897492979 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
@@ -82,7 +82,7 @@ int DeconvolutionDepthwiseCPUKernel::InitWeightBias() {
   return RET_OK;
 }
 
-int DeconvolutionDepthwiseCPUKernel::InitBuffer() {
+int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() {
   if (conv_param_->input_channel_ % C4NUM != 0) {
     need_align_ = true;
     int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
@@ -151,9 +151,10 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
     return RET_ERROR;
   }
 
-  auto ret = InitBuffer();
+  auto ret = InitPackedInputOutput();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret;
+    MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitPackedInputOutput failed.ret: " << ret;
+    FreePackedInputOutput();
     return ret;
   }
 
@@ -176,16 +177,23 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
   ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
-    return RET_ERROR;
   }
 
   if (need_align_) {
     PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_,
                         conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
+  }
+  FreePackedInputOutput();
+  return ret;
+}
+
+void DeconvolutionDepthwiseCPUKernel::FreePackedInputOutput() {
+  if (need_align_) {
     context_->allocator->Free(packed_input_);
     context_->allocator->Free(packed_output_);
+    packed_input_ = nullptr;
+    packed_output_ = nullptr;
   }
-  return RET_OK;
 }
 
 kernel::LiteKernel *CpuDeconvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
index 350584a2b7..4b48db40f1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
@@ -36,11 +36,12 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
   int ReSize() override;
   int Run() override;
 
-  int InitBuffer();
   int InitWeightBias();
   int Execute(int task_id);
 
  private:
+  int InitPackedInputOutput();
+  void FreePackedInputOutput();
   SlidingWindowParam *sliding_ = nullptr;
   float *packed_weight_ = nullptr;
   float *packed_input_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
index 0f027fe238..a8654ddc31 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
@@ -202,6 +202,7 @@ int DeConvolutionCPUKernel::Run() {
   int error_code = InitRunBuf();
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
+    FreeRunBuf();
     return error_code;
   }
 
@@ -218,6 +219,7 @@ int DeConvolutionCPUKernel::Run() {
     error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
+      FreeRunBuf();
       return error_code;
     }
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
index f6392aaae1..b5aa53bd6e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
@@ -390,6 +390,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
   auto ret = InitRunBuf();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "InitRunBuf fail!ret: " << ret;
+    FreeRunBuf();
     return ret;
   }
 
@@ -410,5 +411,4 @@ int DeConvolutionWinogradCPUKernel::Run() {
   FreeRunBuf();
   return RET_OK;
 }
-
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
index 60d99118d4..3b0642f1fe 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
@@ -26,13 +26,9 @@ using mindspore::schema::PrimitiveType_Elu;
 
 namespace mindspore::kernel {
 int EluCPUKernel::Init() {
-  elu_parameter_ = reinterpret_cast<EluParameter *>(op_parameter_);
-  elu_parameter_->thread_num_ = thread_count_;
-
   if (!InferShapeDone()) {
     return RET_OK;
   }
-
   return ReSize();
 }
 
@@ -42,6 +38,8 @@ int EluCPUKernel::ReSize() {
 }
 
 int EluCPUKernel::DoExcute(int task_id) {
+  auto input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
+  auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
   Elu(input_addr, output_addr, elu_parameter_, task_id);
   return RET_OK;
 }
@@ -57,10 +55,7 @@ int EluRun(void *cdata, int task_id) {
 }
 
 int EluCPUKernel::Run() {
-  input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
-  output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
-
-  auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, elu_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
     return RET_ERROR;
@@ -72,16 +67,6 @@ kernel::LiteKernel *CpuEluFp32KernelCreator(const std::vector<lite::Tensor *> &i
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
                                             const lite::InnerContext *ctx, const KernelKey &desc,
                                             const mindspore::lite::PrimitiveC *primitive) {
-  if (parameter == nullptr) {
-    MS_LOG(ERROR) << "parameter is nullptr";
-    return nullptr;
-  }
-  if (ctx == nullptr) {
-    MS_LOG(ERROR) << "ctx is nullptr";
-    free(parameter);
-    return nullptr;
-  }
-  MS_ASSERT(desc.type == PrimitiveType_Elu);
   auto *kernel = new (std::nothrow) EluCPUKernel(parameter, inputs, outputs, ctx, primitive);
   if (kernel == nullptr) {
     MS_LOG(ERROR) << "Create Kernel failed, name: " << parameter->name_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h
index ced5262679..a0dfc066d6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h
@@ -24,25 +24,21 @@
 namespace mindspore::kernel {
 class EluCPUKernel : public LiteKernel {
  public:
-  explicit EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
-                        const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
-  ~EluCPUKernel() override{};
+  EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+               const mindspore::lite::PrimitiveC *primitive)
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
+    elu_parameter_ = reinterpret_cast<EluParameter *>(op_parameter_);
+  }
+  ~EluCPUKernel() = default;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
   int DoExcute(int task_id);
 
- protected:
-  const lite::InnerContext *ctx_ = nullptr;
-  int thread_count_ = 1;
-  EluParameter *elu_parameter_ = nullptr;
-
  private:
-  float *input_addr = nullptr;
-  float *output_addr = nullptr;
+  EluParameter *elu_parameter_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
index 4ef2ed84d4..fbadd72bca 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
@@ -26,9 +26,6 @@ using mindspore::schema::PrimitiveType_EmbeddingLookup;
 
 namespace mindspore::kernel {
 int EmbeddingLookupCPUKernel::Init() {
-  embedding_lookup_parameter_ = reinterpret_cast<EmbeddingLookupParameter *>(op_parameter_);
-  embedding_lookup_parameter_->thread_num = thread_count_;
-
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -36,24 +33,24 @@ int EmbeddingLookupCPUKernel::Init() {
 }
 
 int EmbeddingLookupCPUKernel::ReSize() {
-  embedding_lookup_parameter_->ids_size_ = in_tensors_.back()->ElementsNum();
-
-  embedding_lookup_parameter_->layer_size_ = 1;
+  param_->ids_size_ = in_tensors_.back()->ElementsNum();
+  param_->layer_size_ = 1;
   auto in_shape = in_tensors_.front()->shape();
   for (size_t i = 1; i < in_shape.size(); ++i) {
-    embedding_lookup_parameter_->layer_size_ *= in_shape[i];
+    param_->layer_size_ *= in_shape[i];
   }
 
-  embedding_lookup_parameter_->layer_num_ = 0;
+  param_->layer_num_ = 0;
   for (size_t i = 0; i < in_tensors_.size() - 1; ++i) {
-    embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0];
+    param_->layer_num_ += in_tensors_[i]->shape()[0];
   }
-
   return RET_OK;
 }
 
 int EmbeddingLookupCPUKernel::DoExcute(int task_id) {
-  int error_code = EmbeddingLookup(input_addr_, ids_addr_, output_addr_, embedding_lookup_parameter_, task_id);
+  auto ids_addr = reinterpret_cast<int *>(in_tensors_.back()->MutableData());
+  auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
+  int error_code = EmbeddingLookup(input_addr_, ids_addr, output_addr, param_, task_id);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "embedding lookup error error_code[" << error_code << "]";
     return RET_ERROR;
@@ -62,8 +59,8 @@ int EmbeddingLookupCPUKernel::DoExcute(int task_id) {
 }
 
 int EmbeddingLookupRun(void *cdata, int task_id) {
-  auto EmbeddingLookupData = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata);
-  auto ret = EmbeddingLookupData->DoExcute(task_id);
+  auto kernel = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata);
+  auto ret = kernel->DoExcute(task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "EmbeddingLookupRun error task_id[" << task_id << "] error_code[" << ret << "]";
     return RET_ERROR;
@@ -73,39 +70,38 @@ int EmbeddingLookupRun(void *cdata, int task_id) {
 
 int EmbeddingLookupCPUKernel::Run() {
   MS_ASSERT(context_->allocator != nullptr);
-  input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
-    sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
-  embedding_lookup_parameter_->is_regulated_ =
-    reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
-  if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) {
+  input_addr_ =
+    reinterpret_cast<float *>(context_->allocator->Malloc(sizeof(float) * param_->layer_size_ * param_->layer_num_));
+  param_->is_regulated_ = reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * param_->layer_num_));
+  if (input_addr_ == nullptr || param_->is_regulated_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
-    context_->allocator->Free(input_addr_);
-    context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
+    FreeRunBuff();
     return RET_ERROR;
   }
-  for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) {
-    embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0;
+  for (int i = 0; i < param_->layer_num_; ++i) {
+    param_->is_regulated_[i] = param_->max_norm_ == 0;
   }
-
   int dest_loc = 0;
   for (size_t i = 0; i < in_tensors_.size() - 1; i++) {
     auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData());
     memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
     dest_loc += in_tensors_.at(i)->ElementsNum();
   }
-  output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
-  ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->MutableData());
-
-  auto ret =
-    ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num);
-  context_->allocator->Free(input_addr_);
-  context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
+  auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_);
+  FreeRunBuff();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
   }
   return ret;
 }
 
+void EmbeddingLookupCPUKernel::FreeRunBuff() {
+  context_->allocator->Free(input_addr_);
+  context_->allocator->Free(param_->is_regulated_);
+  input_addr_ = nullptr;
+  param_->is_regulated_ = nullptr;
+}
+
 kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                         const std::vector<lite::Tensor *> &outputs,
                                                         OpParameter *parameter, const lite::InnerContext *ctx,
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h
index edbfe0af49..ad78806765 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h
@@ -27,30 +27,20 @@ class EmbeddingLookupCPUKernel : public LiteKernel {
   explicit EmbeddingLookupCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                     const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
-  ~EmbeddingLookupCPUKernel() override {
-    if (input_addr_ != nullptr) {
-      free(input_addr_);
-    }
-    if (embedding_lookup_parameter_->is_regulated_ != nullptr) {
-      free(embedding_lookup_parameter_->is_regulated_);
-    }
-  };
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
+    param_ = reinterpret_cast<EmbeddingLookupParameter *>(parameter);
+  }
+  ~EmbeddingLookupCPUKernel() = default;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
   int DoExcute(int task_id);
 
- protected:
-  const lite::InnerContext *ctx_ = nullptr;
-  int thread_count_ = 1;
-  EmbeddingLookupParameter *embedding_lookup_parameter_ = nullptr;
-
  private:
+  void FreeRunBuff();
+  EmbeddingLookupParameter *param_ = nullptr;
   float *input_addr_ = nullptr;
-  float *output_addr_ = nullptr;
-  int *ids_addr_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
index 5a75e98e2b..c1eed749e7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
@@ -44,7 +44,9 @@ void FullconnectionCPUKernel::FreeBuf() {
 int FullconnectionCPUKernel::ReSize() {
   FreeBuf();
   int row = 1;
-  for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) row *= (out_tensors_[0]->shape())[i];
+  for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) {
+    row *= (out_tensors_[0]->shape())[i];
+  }
   fc_param_->row_ = row;
   fc_param_->col_ = out_tensors_[0]->shape().back();
   fc_param_->deep_ = (in_tensors_[1]->shape())[1];
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
index 99a586e3c3..60d48f8b30 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
@@ -56,13 +56,12 @@ int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
 }
 
 int InstanceNormRun(void *cdata, int task_id) {
-  auto InstanceNormData = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
-  auto ret = InstanceNormData->DoInstanceNorm(task_id);
+  auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
+  auto ret = kernel->DoInstanceNorm(task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
-    return RET_ERROR;
   }
-  return RET_OK;
+  return ret;
 }
 
 int InstanceNormCPUKernel::Run() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
index c727970ba3..9492c7951a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
@@ -58,8 +58,8 @@ int LayerNormCPUKernel::DoLayerNorm(int thread_id) {
 }
 
 int LayerNormRun(void *cdata, int task_id) {
-  auto LayerNormData = reinterpret_cast<LayerNormCPUKernel *>(cdata);
-  auto ret = LayerNormData->DoLayerNorm(task_id);
+  auto kernel = reinterpret_cast<LayerNormCPUKernel *>(cdata);
+  auto ret = kernel->DoLayerNorm(task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "LayerNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
index c396179670..e3abcb6b27 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
@@ -72,7 +72,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
   return RET_OK;
 }
 
-int ConvolutionDepthwiseSWInt8CPUKernel::InitBuffer() {
+int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
   if (conv_param_->input_channel_ % C8NUM != 0) {
     need_align_ = true;
 
@@ -319,15 +319,10 @@ int ConvDwSWInt8Run(void *cdata, int task_id) {
 }
 
 int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
-  auto ret = InitBuffer();
+  auto ret = InitPackedInputOutput();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
-    if (need_align_) {
-      context_->allocator->Free(packed_input_);
-      context_->allocator->Free(packed_output_);
-      packed_input_ = nullptr;
-      packed_output_ = nullptr;
-    }
+    FreePackedInputOutput();
     return ret;
   }
 
@@ -353,12 +348,17 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
   if (need_align_) {
     PackNHWC8ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_,
                         conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
+  }
+  FreePackedInputOutput();
+  return ret;
+}
+
+void ConvolutionDepthwiseSWInt8CPUKernel::FreePackedInputOutput() {
+  if (need_align_) {
     context_->allocator->Free(packed_input_);
     context_->allocator->Free(packed_output_);
     packed_input_ = nullptr;
     packed_output_ = nullptr;
   }
-  return ret;
 }
-
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
index be0b6c95ab..d97dfe8c29 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
@@ -36,10 +36,11 @@ class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel {
   int Run() override;
 
   int InitWeightBias();
-  int InitBuffer();
+  int InitPackedInputOutput();
   int Execute(int task_id);
 
  private:
+  void FreePackedInputOutput();
   int ReinitQuantParam();
   int ReinitFreeBefore();
   void FreeTmpQuant();