From 3bff4fe825ee7f78ee6957ea4553f6d261483a9b Mon Sep 17 00:00:00 2001
From: sunsuodong <sunsuodong@huawei.com>
Date: Thu, 20 Aug 2020 09:14:52 +0800
Subject: [PATCH] move_malloc_to_run

---
 .../kernel/arm/base/arg_min_max_base.cc       | 43 ++++---------
 .../kernel/arm/base/arg_min_max_base.h        |  9 +--
 .../src/runtime/kernel/arm/fp32/argminmax.cc  |  1 -
 .../src/runtime/kernel/arm/fp32/arithmetic.cc | 43 ++++++-------
 .../src/runtime/kernel/arm/fp32/arithmetic.h  |  3 +-
 .../lite/src/runtime/kernel/arm/fp32/bias.cc  | 15 +++--
 .../kernel/arm/fp32/embedding_lookup.cc       | 55 ++++++----------
 .../lite/src/runtime/kernel/arm/fp32/topk.cc  | 20 +++---
 .../kernel/arm/int8/arithmetic_int8.cc        | 62 +++++--------------
 .../runtime/kernel/arm/int8/arithmetic_int8.h |  3 +-
 .../src/runtime/kernel/arm/int8/div_int8.cc   | 56 ++++-------------
 .../runtime/kernel/arm/int8/softmax_int8.cc   | 46 ++++++--------
 .../runtime/kernel/arm/int8/softmax_int8.h    |  3 +-
 .../src/runtime/kernel/arm/int8/sub_int8.cc   | 45 ++++----------
 .../src/runtime/kernel/arm/int8/topk_int8.cc  | 14 +++--
 15 files changed, 142 insertions(+), 276 deletions(-)
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
index 3e0c460505..94e89bba95 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
@@ -61,19 +61,6 @@ int ArgMinMaxBaseCPUKernel::ReSize() {
     return RET_PARAM_INVALID;
   }
   param->topk_ = MSMIN(param->topk_, in_shape[axis]);
-  if (param->topk_ > 1 || param->keep_dims_) {
-    if (context_ != nullptr && context_->allocator != nullptr) {
-      param->arg_elements_ =
-        reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis]));
-      data_from_allocator_ = true;
-    } else {
-      param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis]));
-    }
-    if (param->arg_elements_ == nullptr) {
-      MS_LOG(ERROR) << "malloc memroy fail!";
-      return RET_ERROR;
-    }
-  }
   ComputeStrides(in_shape.data(), param->in_strides_, in_shape.size());
   auto out_shape = out_tensors_.at(0)->shape();
   ComputeStrides(out_shape.data(), param->out_strides_, out_shape.size());
@@ -81,28 +68,24 @@ int ArgMinMaxBaseCPUKernel::ReSize() {
 }
 
 int ArgMinMaxBaseCPUKernel::Run() {
-  auto input = in_tensors_.at(0);
-
-  auto input_data = reinterpret_cast<const void *>(in_tensors_.at(0)->Data());
+  auto input_data = in_tensors_.at(0)->Data();
   auto output_data = out_tensors_.at(0)->Data();
 
-  auto shape = input->shape().data();
+  auto shape = in_tensors_.at(0)->shape().data();
   auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
-  ArgMinMax(input_data, output_data, reinterpret_cast<const int *>(shape), param);
-  return RET_OK;
-}
-
-void ArgMinMaxBaseCPUKernel::FreeTmpMemory() {
-  auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
-  if (param->arg_elements_ == nullptr) {
-    return;
-  }
-  if (data_from_allocator_) {
-    context_->allocator->Free(param->arg_elements_);
-  } else {
-    free(param->arg_elements_);
+  MS_ASSERT(context_->allocator != nullptr);
+  if (param->topk_ > 1 || param->keep_dims_) {
+    param->arg_elements_ =
+      reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * shape[param->axis_]));
+    if (param->arg_elements_ == nullptr) {
+      MS_LOG(ERROR) << "malloc memroy fail!";
+      return RET_ERROR;
+    }
   }
+  ArgMinMax(input_data, output_data, reinterpret_cast<const int *>(shape), param);
+  context_->allocator->Free(param->arg_elements_);
   param->arg_elements_ = nullptr;
+  return RET_OK;
 }
 
 kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
index 8301f8401f..0605e405e3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
@@ -26,20 +26,15 @@ class ArgMinMaxBaseCPUKernel : public LiteKernel {
   ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                          const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                          const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), data_from_allocator_(false) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
-  virtual ~ArgMinMaxBaseCPUKernel() { FreeTmpMemory(); }
+  virtual ~ArgMinMaxBaseCPUKernel() = default;
 
   int Init() override;
-
   int ReSize() override;
-
   int Run() override;
 
-  void FreeTmpMemory();
-
  private:
-  bool data_from_allocator_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc
index 191ed47146..64d951eb22 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc
@@ -43,7 +43,6 @@ int ArgMinMaxCPUKernel::Init() {
 }
 
 int ArgMinMaxCPUKernel::ReSize() {
-  ArgMinMaxBaseCPUKernel::FreeTmpMemory();
   return ArgMinMaxBaseCPUKernel::ReSize();
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
index 4ea008514b..2f56c5d9e9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
@@ -29,19 +29,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Eltwise;
 
 namespace mindspore::kernel {
-void ArithmeticCPUKernel::FreeTileData() {
-  if (tile_data0_ != nullptr) {
-    delete[](tile_data0_);
-    tile_data0_ = nullptr;
-  }
-  if (tile_data1_ != nullptr) {
-    delete[](tile_data1_);
-    tile_data1_ = nullptr;
-  }
-}
-
-ArithmeticCPUKernel::~ArithmeticCPUKernel() { FreeTileData(); }
-
 int ArithmeticCPUKernel::Init() {
   if (!InferShapeDone()) {
     return RET_OK;
@@ -50,7 +37,6 @@ int ArithmeticCPUKernel::Init() {
 }
 
 int ArithmeticCPUKernel::ReSize() {
-  FreeTileData();
   arithmeticParameter_->in_elements_num0_ = in_tensors_[0]->ElementsNum();
   arithmeticParameter_->in_elements_num1_ = in_tensors_[1]->ElementsNum();
   arithmeticParameter_->out_elements_num_ = out_tensors_[0]->ElementsNum();
@@ -75,12 +61,6 @@ int ArithmeticCPUKernel::ReSize() {
       }
     }
   }
-
-  if (arithmeticParameter_->broadcasting_) {
-    tile_data0_ = new float[arithmeticParameter_->out_elements_num_];
-    tile_data1_ = new float[arithmeticParameter_->out_elements_num_];
-  }
-
   return RET_OK;
 }
 
@@ -144,14 +124,27 @@ int ArithmeticCPUKernel::Run() {
   if (arithmeticParameter_->broadcasting_) {
     auto input_data0 = reinterpret_cast<float *>(in_tensors_[0]->Data());
     auto input_data1 = reinterpret_cast<float *>(in_tensors_[1]->Data());
+    auto length = arithmeticParameter_->out_elements_num_ * sizeof(float);
+    MS_ASSERT(context_->allocator != nullptr);
+    tile_data0_ = reinterpret_cast<float *>(context_->allocator->Malloc(length));
+    tile_data1_ = reinterpret_cast<float *>(context_->allocator->Malloc(length));
+    if (tile_data0_ == nullptr || tile_data1_ == nullptr) {
+      MS_LOG(ERROR) << "Memory allocation failed";
+      context_->allocator->Free(tile_data0_);
+      context_->allocator->Free(tile_data1_);
+      return RET_ERROR;
+    }
     TileDimensions(input_data0, input_data1, tile_data0_, tile_data1_, arithmeticParameter_);
   }
-  int error_code = LiteBackendParallelLaunch(ArithmeticsRun, this, thread_count_);
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "Arithmetic function error error_code[" << error_code << "]";
-    return RET_ERROR;
+  ret = LiteBackendParallelLaunch(ArithmeticsRun, this, thread_count_);
+  if (arithmeticParameter_->broadcasting_) {
+    context_->allocator->Free(tile_data0_);
+    context_->allocator->Free(tile_data1_);
   }
-  return RET_OK;
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Arithmetic function error error_code[" << ret << "]";
+  }
+  return ret;
 }
 
 kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
index 7bea43390b..3bff65a5d9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
@@ -166,7 +166,7 @@ class ArithmeticCPUKernel : public LiteKernel {
         break;
     }
   }
-  ~ArithmeticCPUKernel() override;
+  ~ArithmeticCPUKernel() = default;
 
   int Init() override;
   int ReSize() override;
@@ -174,7 +174,6 @@ class ArithmeticCPUKernel : public LiteKernel {
   int DoArithmetic(int task_id);
 
  private:
-  void FreeTileData();
   int thread_count_;
   float *tile_data0_ = nullptr;
   float *tile_data1_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
index 3a80e27d7f..b007eac2e0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
@@ -51,11 +51,18 @@ int BiasCPUKernel::Run() {
   auto bias = reinterpret_cast<float *>(in_tensors_.at(1)->Data());
   auto out = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
   size_t data_size = in_tensors_.at(0)->ElementsNum();
-  auto tile_in = new float[data_size];
-  auto tile_bias = new float[data_size];
+  MS_ASSERT(context_->allocator != nullptr);
+  float *tile_in = reinterpret_cast<float *>(context_->allocator->Malloc(data_size * sizeof(float)));
+  float *tile_bias = reinterpret_cast<float *>(context_->allocator->Malloc(data_size * sizeof(float)));
+  if (tile_in == nullptr || tile_bias == nullptr) {
+    MS_LOG(ERROR) << "Memory allocation failed";
+    context_->allocator->Free(tile_in);
+    context_->allocator->Free(tile_bias);
+    return RET_ERROR;
+  }
   BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, bias_param_);
-  delete[] tile_in;
-  delete[] tile_bias;
+  context_->allocator->Free(tile_in);
+  context_->allocator->Free(tile_bias);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
index 8c9e20850a..7830c2c576 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
@@ -49,40 +49,6 @@ int EmbeddingLookupCPUKernel::ReSize() {
     embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0];
   }
 
-  if (input_addr_ != nullptr) {
-    free(input_addr_);
-  }
-  if (context_ != nullptr && context_->allocator != nullptr) {
-    input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
-      sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
-  } else {
-    input_addr_ = reinterpret_cast<float *>(
-      malloc(sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
-  }
-  if (input_addr_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed";
-    return RET_ERROR;
-  }
-
-  if (embedding_lookup_parameter_->is_regulated_ != nullptr) {
-    free(embedding_lookup_parameter_->is_regulated_);
-  }
-  if (context_ != nullptr && context_->allocator != nullptr) {
-    embedding_lookup_parameter_->is_regulated_ =
-      reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
-  } else {
-    embedding_lookup_parameter_->is_regulated_ =
-      reinterpret_cast<bool *>(malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
-  }
-  if (embedding_lookup_parameter_->is_regulated_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed";
-    return RET_ERROR;
-  }
-
-  for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) {
-    embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0;
-  }
-
   return RET_OK;
 }
 
@@ -111,6 +77,22 @@ int EmbeddingLookupCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
     return prepare_ret;
   }
+
+  MS_ASSERT(context_->allocator != nullptr);
+  input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
+    sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
+  embedding_lookup_parameter_->is_regulated_ =
+    reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
+  if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) {
+    MS_LOG(ERROR) << "Memory allocation failed";
+    context_->allocator->Free(input_addr_);
+    context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
+    return RET_ERROR;
+  }
+  for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) {
+    embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0;
+  }
+
   int dest_loc = 0;
   for (int i = 0; i < in_tensors_.size() - 1; i++) {
     auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->Data());
@@ -121,11 +103,12 @@ int EmbeddingLookupCPUKernel::Run() {
   ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->Data());
 
   auto ret = LiteBackendParallelLaunch(EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num);
+  context_->allocator->Free(input_addr_);
+  context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
-    return RET_ERROR;
   }
-  return RET_OK;
+  return ret;
 }
 
 kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
index 6c6be20aa1..39216fb510 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
@@ -34,23 +34,13 @@ int TopKCPUKernel::Init() {
 }
 
 int TopKCPUKernel::ReSize() {
-  TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
   lite::tensor::Tensor *input = in_tensors_.at(0);
+  TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
   parameter->last_dim_size_ = input->shape()[input->shape().size() - 1];
   parameter->loop_num_ = 1;
   for (int i = 0; i < input->shape().size() - 1; ++i) {
     parameter->loop_num_ *= input->shape()[i];
   }
-
-  if (parameter->topk_node_list_ != nullptr) {
-    free(parameter->topk_node_list_);
-    parameter->topk_node_list_ = nullptr;
-  }
-  parameter->topk_node_list_ = malloc(sizeof(TopkNode) * parameter->last_dim_size_);
-  if (parameter->topk_node_list_ == nullptr) {
-    MS_LOG(ERROR) << "malloc fail.";
-    return RET_ERROR;
-  }
   return RET_OK;
 }
 
@@ -64,7 +54,15 @@ int TopKCPUKernel::Run() {
   auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
   auto output_index = reinterpret_cast<int32_t *>(out_tensors_.at(1)->Data());
 
+  MS_ASSERT(context_->allocator != nullptr);
+  TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
+  parameter->topk_node_list_ = context_->allocator->Malloc(sizeof(TopkNode) * parameter->last_dim_size_);
+  if (parameter->topk_node_list_ == nullptr) {
+    MS_LOG(ERROR) << "Memory allocation failed";
+    return RET_ERROR;
+  }
   Topk(input_data, output_data, output_index, reinterpret_cast<TopkParameter *>(op_parameter_));
+  context_->allocator->Free(parameter->topk_node_list_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
index 7a215fcd08..21e65d7754 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
@@ -47,32 +47,6 @@ int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata
 }
 }  // namespace
 
-void ArithmeticInt8CPUKernel::FreeTileData() {
-  auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
-  if (!param->broadcasting_) {
-    return;
-  }
-  if (context_->allocator != nullptr) {
-    if (tile_data0_ != nullptr) {
-      context_->allocator->Free(tile_data0_);
-    }
-    if (tile_data1_ != nullptr) {
-      context_->allocator->Free(tile_data1_);
-    }
-  } else {
-    if (tile_data0_ != nullptr) {
-      free(tile_data0_);
-    }
-    if (tile_data1_ != nullptr) {
-      free(tile_data1_);
-    }
-  }
-  tile_data0_ = nullptr;
-  tile_data1_ = nullptr;
-}
-
-ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() { FreeTileData(); }
-
 int ArithmeticInt8CPUKernel::Init() {
   switch (op_parameter_->type_) {
     case PrimitiveType_Equal:
@@ -121,21 +95,6 @@ int ArithmeticInt8CPUKernel::Init() {
 }
 
 int ArithmeticInt8CPUKernel::ReSize() {
-  FreeTileData();
-  auto data_size = out_tensors_[0]->Size();
-  auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
-  if (param->broadcasting_) {
-    if (context_->allocator != nullptr) {
-      tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
-      tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
-    } else {
-      tile_data0_ = reinterpret_cast<int8_t *>(malloc(data_size));
-      tile_data1_ = reinterpret_cast<int8_t *>(malloc(data_size));
-    }
-  } else {
-    tile_data0_ = nullptr;
-    tile_data1_ = nullptr;
-  }
   return RET_OK;
 }
 
@@ -182,14 +141,25 @@ int ArithmeticInt8CPUKernel::Run() {
   if (param->broadcasting_) {
     auto input_data0 = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
     auto input_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->Data());
+    tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(out_tensors_[0]->Size()));
+    tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(out_tensors_[0]->Size()));
+    if (tile_data0_ == nullptr || tile_data1_ == nullptr) {
+      MS_LOG(ERROR) << "Memory allocation failed";
+      context_->allocator->Free(tile_data0_);
+      context_->allocator->Free(tile_data1_);
+      return RET_ERROR;
+    }
     TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param);
   }
-  int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, op_parameter_->thread_num_);
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code;
-    return RET_ERROR;
+  ret = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, op_parameter_->thread_num_);
+  if (param->broadcasting_) {
+    context_->allocator->Free(tile_data0_);
+    context_->allocator->Free(tile_data1_);
   }
-  return RET_OK;
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << ret;
+  }
+  return ret;
 }
 
 kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
index b9d1de8b37..c109cdbd93 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
@@ -32,7 +32,7 @@ class ArithmeticInt8CPUKernel : public LiteKernel {
                           const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                           const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~ArithmeticInt8CPUKernel();
+  ~ArithmeticInt8CPUKernel() = default;
 
   int Init() override;
   int ReSize() override;
@@ -40,7 +40,6 @@ class ArithmeticInt8CPUKernel : public LiteKernel {
   int DoArithmetic(int thread_id);
 
  private:
-  void FreeTileData();
   int8_t *tile_data0_;
   int8_t *tile_data1_;
   ArithmeticRunInt8 arithmetic_run_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc
index 657a8379c3..7a76970ed0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc
@@ -59,49 +59,6 @@ int DivInt8CPUKernel::Init() {
 }
 
 int DivInt8CPUKernel::ReSize() {
-  if (broadcast_) {
-    if (tile0_data_ != nullptr) {
-      if (context_ != nullptr && context_->allocator != nullptr) {
-        context_->allocator->Free(tile0_data_);
-      } else {
-        free(tile0_data_);
-      }
-    }
-    if (tile1_data_ != nullptr) {
-      if (context_ != nullptr && context_->allocator != nullptr) {
-        context_->allocator->Free(tile1_data_);
-      } else {
-        free(tile1_data_);
-      }
-    }
-
-    if (context_ != nullptr && context_->allocator != nullptr) {
-      tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
-      tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
-    } else {
-      tile0_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
-      tile1_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
-    }
-
-    if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
-      if (tile0_data_ != nullptr) {
-        if (context_ != nullptr && context_->allocator != nullptr) {
-          context_->allocator->Free(tile0_data_);
-        } else {
-          free(tile0_data_);
-        }
-      }
-      if (tile1_data_ != nullptr) {
-        if (context_ != nullptr && context_->allocator != nullptr) {
-          context_->allocator->Free(tile1_data_);
-        } else {
-          free(tile1_data_);
-        }
-      }
-      MS_LOG(ERROR) << "malloc memroy fail!";
-      return RET_ERROR;
-    }
-  }
   return RET_OK;
 }
 
@@ -154,12 +111,23 @@ int DivInt8CPUKernel::Run() {
       tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i);
       tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i);
     }
+    tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
+    tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
+    if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
+      MS_LOG(ERROR) << "Memory allocation failed";
+      context_->allocator->Free(tile0_data_);
+      context_->allocator->Free(tile1_data_);
+      return RET_ERROR;
+    }
     TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()),
                         static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_),
                         reinterpret_cast<uint8_t *>(tile1_data_), &tile_para);
   }
   ret = LiteBackendParallelLaunch(DivInt8Run, this, op_parameter_->thread_num_);
-
+  if (broadcast_) {
+    context_->allocator->Free(tile0_data_);
+    context_->allocator->Free(tile1_data_);
+  }
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "DivInt8Run function error error_code[" << ret << "]";
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
index 446495ca98..18025196f6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
@@ -62,30 +62,8 @@ int SoftmaxInt8CPUKernel::Init() {
   return ReSize();
 }
 
-void SoftmaxInt8CPUKernel::FreeTmpBuffer() {
-  if (exp_data_ != nullptr) {
-    free(exp_data_);
-    exp_data_ = nullptr;
-  }
-  if (sum_data_ != nullptr) {
-    free(sum_data_);
-    sum_data_ = nullptr;
-  }
-}
-
 int SoftmaxInt8CPUKernel::ReSize() {
-  auto ret = SoftmaxBaseCPUKernel::ReSize();
-  if (ret != RET_OK) {
-    return ret;
-  }
-  FreeTmpBuffer();
-  exp_data_ = reinterpret_cast<int *>(malloc(softmax_param_->element_size_ * sizeof(int)));
-  int inner_size = 1;
-  for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) {
-    inner_size *= softmax_param_->input_shape_[i];
-  }
-  sum_data_ = reinterpret_cast<int *>(malloc(inner_size * sizeof(int)));
-  return RET_OK;
+  return SoftmaxBaseCPUKernel::ReSize();
 }
 
 int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) {
@@ -132,12 +110,24 @@ int SoftmaxInt8CPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
     return RET_ERROR;
   }
-
-  int error_code = LiteBackendParallelLaunch(SoftmaxRun, this, thread_count_);
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "Softmax function error error_code[" << error_code << "]";
+  exp_data_ = reinterpret_cast<int *>(context_->allocator->Malloc(softmax_param_->element_size_ * sizeof(int)));
+  int inner_size = 1;
+  for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) {
+    inner_size *= softmax_param_->input_shape_[i];
+  }
+  sum_data_ = reinterpret_cast<int *>(context_->allocator->Malloc(inner_size * sizeof(int)));
+  if (exp_data_ == nullptr || sum_data_ == nullptr) {
+    MS_LOG(ERROR) << "Memory allocation failed";
+    context_->allocator->Free(exp_data_);
+    context_->allocator->Free(sum_data_);
     return RET_ERROR;
   }
-  return RET_OK;
+  ret = LiteBackendParallelLaunch(SoftmaxRun, this, thread_count_);
+  context_->allocator->Free(exp_data_);
+  context_->allocator->Free(sum_data_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Softmax function error error_code[" << ret << "]";
+  }
+  return ret;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
index acb7fb895d..1300bdb324 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
@@ -28,7 +28,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel {
                        const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
       : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~SoftmaxInt8CPUKernel() { FreeTmpBuffer(); }
+  ~SoftmaxInt8CPUKernel() {}
 
   int Init() override;
   int ReSize() override;
@@ -36,7 +36,6 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel {
   int DoSoftmax(int task_id);
 
  private:
-  void FreeTmpBuffer();
   int *sum_data_ = nullptr;
   int *exp_data_ = nullptr;
   SoftmaxQuantArg quant_params_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
index f52161cb4f..61aaf47c03 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
@@ -81,35 +81,6 @@ int SubInt8CPUKernel::Init() {
 }
 
 int SubInt8CPUKernel::ReSize() {
-  if (broadcast_) {
-    if (tile0_data_ != nullptr) {
-      if (context_ != nullptr && context_->allocator != nullptr) {
-        context_->allocator->Free(tile0_data_);
-      } else {
-        free(tile0_data_);
-      }
-    }
-    if (tile1_data_ != nullptr) {
-      if (context_ != nullptr && context_->allocator != nullptr) {
-        context_->allocator->Free(tile1_data_);
-      } else {
-        free(tile1_data_);
-      }
-    }
-
-    if (context_ != nullptr && context_->allocator != nullptr) {
-      tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
-      tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
-    } else {
-      tile0_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
-      tile1_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
-    }
-
-    if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
-      MS_LOG(ERROR) << "malloc memroy fail!";
-      return RET_ERROR;
-    }
-  }
   return RET_OK;
 }
 
@@ -164,17 +135,27 @@ int SubInt8CPUKernel::Run() {
       tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i);
       tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i);
     }
+    tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
+    tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
+    if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
+      MS_LOG(ERROR) << "malloc memroy fail!";
+      context_->allocator->Free(tile0_data_);
+      context_->allocator->Free(tile1_data_);
+      return RET_ERROR;
+    }
     TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()),
                         static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_),
                         reinterpret_cast<uint8_t *>(tile1_data_), &tile_para);
   }
   ret = LiteBackendParallelLaunch(SubInt8Run, this, op_parameter_->thread_num_);
-
+  if (broadcast_) {
+    context_->allocator->Free(tile0_data_);
+    context_->allocator->Free(tile1_data_);
+  }
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "SubInt8Run function error error_code[" << ret << "]";
-    return RET_ERROR;
   }
-  return RET_OK;
+  return ret;
 }
 
 kernel::LiteKernel *CpuSubInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
index 191060604a..635d2266dc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
@@ -43,12 +43,6 @@ int TopKInt8CPUKernel::ReSize() {
   for (int i = 0; i < input->shape().size() - 1; ++i) {
     parameter->loop_num_ *= input->shape()[i];
   }
-
-  parameter->topk_node_list_ = malloc(sizeof(TopkNodeInt8) * parameter->last_dim_size_);
-  if (parameter->topk_node_list_ == nullptr) {
-    MS_LOG(ERROR) << "malloc fail.";
-    return RET_ERROR;
-  }
   return RET_OK;
 }
 
@@ -62,7 +56,15 @@ int TopKInt8CPUKernel::Run() {
   int8_t *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data());
   int32_t *output_index = reinterpret_cast<int32_t *>(out_tensors_.at(1)->Data());
 
+  MS_ASSERT(context_->allocator != nullptr);
+  TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
+  parameter->topk_node_list_ = context_->allocator->Malloc(sizeof(TopkNodeInt8) * parameter->last_dim_size_);
+  if (parameter->topk_node_list_ == nullptr) {
+    MS_LOG(ERROR) << "Memory allocation failed";
+    return RET_ERROR;
+  }
   TopkInt8(input_data, output_data, output_index, reinterpret_cast<TopkParameter *>(op_parameter_));
+  context_->allocator->Free(parameter->topk_node_list_);
   return RET_OK;
 }