!8843 [MSLITE][Develop] fix code review

From: @sunsuodong Reviewed-by: @ddwsky,@zhanghaibo5 Signed-off-by: @zhanghaibo5
5 years ago · 3ee7c035d4
--- a/mindspore/lite/nnacl/fp32/elu_fp32.c
+++ b/mindspore/lite/nnacl/fp32/elu_fp32.c
@@ -23,7 +23,7 @@ void Calculate_Data(const float *input_data, float *output_data, int num, EluPar
 }

 int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) {
  for (size_t i = task_id; i < parameter->in_size_; i += parameter->thread_num_) {
  for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) {
    Calculate_Data(input_data, output_data, i, parameter);
  }
  return NNACL_OK;
--- a/mindspore/lite/nnacl/fp32/elu_fp32.h
+++ b/mindspore/lite/nnacl/fp32/elu_fp32.h
@@ -22,7 +22,6 @@
 typedef struct EluParameter {
  OpParameter op_parameter_;
  float alpha_;
  int thread_num_;
  int in_size_;
 } EluParameter;

--- a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c
+++ b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c
@@ -47,7 +47,7 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding
 }

 int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) {
  for (size_t i = task_id; i < parameter->ids_size_; i += parameter->thread_num) {
  for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
    int ret = CopyData(input_data, ids, output_data, i, parameter);
    if (ret != NNACL_OK) {
      return ret;
--- a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h
+++ b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h
@@ -26,7 +26,6 @@ typedef struct EmbeddingLookupParameter {
  int ids_size_;
  int layer_size_;
  int layer_num_;
  int thread_num;
 } EmbeddingLookupParameter;

 #ifdef __cplusplus
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
@@ -40,7 +40,7 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() {
  }
 }

 int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() {
 int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() {
  if (conv_param_->input_channel_ % C8NUM != 0) {
    need_align_ = true;
    int C8 = UP_DIV(conv_param_->input_channel_, C8NUM);
@@ -142,19 +142,17 @@ static int ConvDwSWFp16Run(void *cdata, int task_id) {
 }

 int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
  auto ret = InitBuffer();
  auto ret = InitPackedInputOutput();
  if (ret != 0) {
    MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed.";
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    MS_LOG(ERROR) << "Convolution depthwise fp16 InitPackedInputOutput failed.";
    FreePackedInputOutput();
    return ret;
  }

  ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Get Execute tensor failed.";
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    FreePackedInputOutput();
    ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
    return ret;
  }
@@ -173,11 +171,19 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
  if (need_align_) {
    PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
                        conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
  }
  ConvolutionBaseFP16CPUKernel::IfCastOutput();
  ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
  return RET_OK;
  FreePackedInputOutput();
  return ret;
 }

 void ConvolutionDepthwiseSWFp16CPUKernel::FreePackedInputOutput() {
  if (need_align_) {
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    packed_input_ = nullptr;
    packed_output_ = nullptr;
  }
 }
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
@@ -45,11 +45,12 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
  int ReSize() override;
  int Run() override;

  int InitBuffer();
  int InitPackedInputOutput();
  int InitWeightBias();
  int Execute(int task_id);

 private:
  void FreePackedInputOutput();
  SlidingWindowParam *sliding_ = nullptr;
  float16_t *packed_weight_ = nullptr;
  float16_t *packed_input_ = nullptr;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
@@ -53,7 +53,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() {
  return RET_OK;
 }

 int DeconvolutionDepthwiseFp16CPUKernel::InitBuffer() {
 int DeconvolutionDepthwiseFp16CPUKernel::InitPackedInputOutput() {
  if (conv_param_->input_channel_ % C8NUM != 0) {
    need_align_ = true;
    int C8 = UP_DIV(conv_param_->input_channel_, C8NUM);
@@ -156,19 +156,17 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
    MS_LOG(ERROR) << "Only support input channel equals output channel.";
    return RET_ERROR;
  }
  auto ret = InitBuffer();
  auto ret = InitPackedInputOutput();
  if (ret != 0) {
    MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed.";
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitPackedInputOutput failed.";
    FreePackedInputOutput();
    return RET_ERROR;
  }

  ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Get Execute tensor failed.";
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    FreePackedInputOutput();
    ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
    return ret;
  }
@@ -191,14 +189,22 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
  if (need_align_) {
    PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
                        conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
  }
  ConvolutionBaseFP16CPUKernel::IfCastOutput();
  ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
  FreePackedInputOutput();
  return ret;
 }

 void DeconvolutionDepthwiseFp16CPUKernel::FreePackedInputOutput() {
  if (need_align_) {
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    packed_input_ = nullptr;
    packed_output_ = nullptr;
  }
 }

 kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
                                                 const lite::InnerContext *ctx, const kernel::KernelKey &desc,
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
@@ -46,12 +46,13 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
  int ReSize() override;
  int Run() override;

  int InitBuffer();
  int InitPackedInputOutput();
  int InitWeightBias();
  int InitSlideParam();
  int Execute(int task_id);

 private:
  void FreePackedInputOutput();
  SlidingWindowParam *sliding_ = nullptr;
  float16_t *packed_weight_ = nullptr;
  float16_t *packed_input_ = nullptr;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
@@ -183,7 +183,7 @@ int DeConvolutionFp16CPUKernel::Run() {

  int error_code = InitRunBuf();
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
    MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]";
    ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
    FreeRunBuf();
    return RET_ERROR;
@@ -197,7 +197,7 @@ int DeConvolutionFp16CPUKernel::Run() {

    error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
      MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
    }
  }

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
@@ -70,7 +70,7 @@ int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() {
  return RET_OK;
 }

 int ConvolutionDepthwiseSWCPUKernel::InitBuffer() {
 int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() {
  if (conv_param_->input_channel_ % C4NUM != 0) {
    need_align_ = true;
    int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
@@ -134,9 +134,10 @@ int ConvDwSWRun(void *cdata, int task_id) {
 }

 int ConvolutionDepthwiseSWCPUKernel::Run() {
  auto ret = InitBuffer();
  auto ret = InitPackedInputOutput();
  if (ret != 0) {
    MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed.";
    MS_LOG(ERROR) << "Convolution depthwise fp32 InitPackedInputOutput failed.";
    FreePackedInputOutput();
    return RET_ERROR;
  }
  auto input_tensor = in_tensors_.at(kInputIndex);
@@ -159,16 +160,22 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
  ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
    return RET_ERROR;
  }

  if (need_align_) {
    PackNHWC4ToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_,
                        conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
  }
  FreePackedInputOutput();
  return ret;
 }

 void ConvolutionDepthwiseSWCPUKernel::FreePackedInputOutput() {
  if (need_align_) {
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    packed_input_ = nullptr;
    packed_output_ = nullptr;
  }

  return RET_OK;
 }
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
@@ -35,11 +35,12 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel {
  int ReSize() override;
  int Run() override;

  int InitBuffer();
  int InitWeightBias();
  int Execute(int task_id);

 private:
  int InitPackedInputOutput();
  void FreePackedInputOutput();
  SlidingWindowParam *sliding_ = nullptr;
  float *packed_weight_ = nullptr;
  float *packed_input_ = nullptr;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
@@ -146,21 +146,20 @@ int ConvolutionCPUKernel::Run() {
  auto ret = InitTmpBuffer();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Init tmp buffer failed.";
    FreeTmpBuffer();
    return RET_ERROR;
  }

  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
    FreeTmpBuffer();
    return RET_ERROR;
  ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
  }
  FreeTmpBuffer();
  return RET_OK;
  return ret;
 }

 ConvParameter *CreateNewConvParameter(ConvParameter *parameter) {
  auto conv_parameter = reinterpret_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
  auto conv_parameter = new (std::nothrow) ConvParameter;
  if (conv_parameter == nullptr) {
    MS_LOG(ERROR) << "Malloc new conv parameter failed.";
    return nullptr;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
@@ -222,17 +222,16 @@ int ConvolutionWinogradCPUKernel::Run() {
  auto ret = InitTmpBuffer();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Init tmp buffer failed.";
    FreeTmpBuffer();
    return RET_ERROR;
  }

  int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]";
    FreeTmpBuffer();
    return RET_ERROR;
  ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
  }

  FreeTmpBuffer();
  return RET_OK;
  return ret;
 }
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
@@ -82,7 +82,7 @@ int DeconvolutionDepthwiseCPUKernel::InitWeightBias() {
  return RET_OK;
 }

 int DeconvolutionDepthwiseCPUKernel::InitBuffer() {
 int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() {
  if (conv_param_->input_channel_ % C4NUM != 0) {
    need_align_ = true;
    int IC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
@@ -151,9 +151,10 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
    return RET_ERROR;
  }

  auto ret = InitBuffer();
  auto ret = InitPackedInputOutput();
  if (ret != 0) {
    MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret;
    MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitPackedInputOutput failed.ret: " << ret;
    FreePackedInputOutput();
    return ret;
  }

@@ -176,16 +177,23 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
  ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
    return RET_ERROR;
  }

  if (need_align_) {
    PackNHWC4ToNHWCFp32(packed_output_, output_addr, conv_param_->output_batch_,
                        conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
  }
  FreePackedInputOutput();
  return ret;
 }

 void DeconvolutionDepthwiseCPUKernel::FreePackedInputOutput() {
  if (need_align_) {
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    packed_input_ = nullptr;
    packed_output_ = nullptr;
  }
  return RET_OK;
 }

 kernel::LiteKernel *CpuDeconvDwFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
@@ -36,11 +36,12 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
  int ReSize() override;
  int Run() override;

  int InitBuffer();
  int InitWeightBias();
  int Execute(int task_id);

 private:
  int InitPackedInputOutput();
  void FreePackedInputOutput();
  SlidingWindowParam *sliding_ = nullptr;
  float *packed_weight_ = nullptr;
  float *packed_input_ = nullptr;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
@@ -202,6 +202,7 @@ int DeConvolutionCPUKernel::Run() {
  int error_code = InitRunBuf();
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
    FreeRunBuf();
    return error_code;
  }

@@ -218,6 +219,7 @@ int DeConvolutionCPUKernel::Run() {
    error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
      FreeRunBuf();
      return error_code;
    }
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
@@ -390,6 +390,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
  auto ret = InitRunBuf();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "InitRunBuf fail!ret: " << ret;
    FreeRunBuf();
    return ret;
  }

@@ -410,5 +411,4 @@ int DeConvolutionWinogradCPUKernel::Run() {
  FreeRunBuf();
  return RET_OK;
 }

 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
@@ -26,13 +26,9 @@ using mindspore::schema::PrimitiveType_Elu;

 namespace mindspore::kernel {
 int EluCPUKernel::Init() {
  elu_parameter_ = reinterpret_cast<EluParameter *>(op_parameter_);
  elu_parameter_->thread_num_ = thread_count_;

  if (!InferShapeDone()) {
    return RET_OK;
  }

  return ReSize();
 }

@@ -42,6 +38,8 @@ int EluCPUKernel::ReSize() {
 }

 int EluCPUKernel::DoExcute(int task_id) {
  auto input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
  auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
  Elu(input_addr, output_addr, elu_parameter_, task_id);
  return RET_OK;
 }
@@ -57,10 +55,7 @@ int EluRun(void *cdata, int task_id) {
 }

 int EluCPUKernel::Run() {
  input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
  output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());

  auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, elu_parameter_->thread_num_);
  auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
    return RET_ERROR;
@@ -72,16 +67,6 @@ kernel::LiteKernel *CpuEluFp32KernelCreator(const std::vector<lite::Tensor *> &i
                                            const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
                                            const lite::InnerContext *ctx, const KernelKey &desc,
                                            const mindspore::lite::PrimitiveC *primitive) {
  if (parameter == nullptr) {
    MS_LOG(ERROR) << "parameter is nullptr";
    return nullptr;
  }
  if (ctx == nullptr) {
    MS_LOG(ERROR) << "ctx is nullptr";
    free(parameter);
    return nullptr;
  }
  MS_ASSERT(desc.type == PrimitiveType_Elu);
  auto *kernel = new (std::nothrow) EluCPUKernel(parameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "Create Kernel failed, name: " << parameter->name_;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.h
@@ -24,25 +24,21 @@
 namespace mindspore::kernel {
 class EluCPUKernel : public LiteKernel {
 public:
  explicit EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
  ~EluCPUKernel() override{};
  EluCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
               const mindspore::lite::PrimitiveC *primitive)
      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
    elu_parameter_ = reinterpret_cast<EluParameter *>(op_parameter_);
  }
  ~EluCPUKernel() = default;

  int Init() override;
  int ReSize() override;
  int Run() override;
  int DoExcute(int task_id);

 protected:
  const lite::InnerContext *ctx_ = nullptr;
  int thread_count_ = 1;
  EluParameter *elu_parameter_ = nullptr;

 private:
  float *input_addr = nullptr;
  float *output_addr = nullptr;
  EluParameter *elu_parameter_ = nullptr;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
@@ -26,9 +26,6 @@ using mindspore::schema::PrimitiveType_EmbeddingLookup;

 namespace mindspore::kernel {
 int EmbeddingLookupCPUKernel::Init() {
  embedding_lookup_parameter_ = reinterpret_cast<EmbeddingLookupParameter *>(op_parameter_);
  embedding_lookup_parameter_->thread_num = thread_count_;

  if (!InferShapeDone()) {
    return RET_OK;
  }
@@ -36,24 +33,24 @@ int EmbeddingLookupCPUKernel::Init() {
 }

 int EmbeddingLookupCPUKernel::ReSize() {
  embedding_lookup_parameter_->ids_size_ = in_tensors_.back()->ElementsNum();

  embedding_lookup_parameter_->layer_size_ = 1;
  param_->ids_size_ = in_tensors_.back()->ElementsNum();
  param_->layer_size_ = 1;
  auto in_shape = in_tensors_.front()->shape();
  for (size_t i = 1; i < in_shape.size(); ++i) {
    embedding_lookup_parameter_->layer_size_ *= in_shape[i];
    param_->layer_size_ *= in_shape[i];
  }

  embedding_lookup_parameter_->layer_num_ = 0;
  param_->layer_num_ = 0;
  for (size_t i = 0; i < in_tensors_.size() - 1; ++i) {
    embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0];
    param_->layer_num_ += in_tensors_[i]->shape()[0];
  }

  return RET_OK;
 }

 int EmbeddingLookupCPUKernel::DoExcute(int task_id) {
  int error_code = EmbeddingLookup(input_addr_, ids_addr_, output_addr_, embedding_lookup_parameter_, task_id);
  auto ids_addr = reinterpret_cast<int *>(in_tensors_.back()->MutableData());
  auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
  int error_code = EmbeddingLookup(input_addr_, ids_addr, output_addr, param_, task_id);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "embedding lookup error error_code[" << error_code << "]";
    return RET_ERROR;
@@ -62,8 +59,8 @@ int EmbeddingLookupCPUKernel::DoExcute(int task_id) {
 }

 int EmbeddingLookupRun(void *cdata, int task_id) {
  auto EmbeddingLookupData = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata);
  auto ret = EmbeddingLookupData->DoExcute(task_id);
  auto kernel = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata);
  auto ret = kernel->DoExcute(task_id);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "EmbeddingLookupRun error task_id[" << task_id << "] error_code[" << ret << "]";
    return RET_ERROR;
@@ -73,39 +70,38 @@ int EmbeddingLookupRun(void *cdata, int task_id) {

 int EmbeddingLookupCPUKernel::Run() {
  MS_ASSERT(context_->allocator != nullptr);
  input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
    sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
  embedding_lookup_parameter_->is_regulated_ =
    reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
  if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) {
  input_addr_ =
    reinterpret_cast<float *>(context_->allocator->Malloc(sizeof(float) * param_->layer_size_ * param_->layer_num_));
  param_->is_regulated_ = reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * param_->layer_num_));
  if (input_addr_ == nullptr || param_->is_regulated_ == nullptr) {
    MS_LOG(ERROR) << "Memory allocation failed";
    context_->allocator->Free(input_addr_);
    context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
    FreeRunBuff();
    return RET_ERROR;
  }
  for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) {
    embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0;
  for (int i = 0; i < param_->layer_num_; ++i) {
    param_->is_regulated_[i] = param_->max_norm_ == 0;
  }

  int dest_loc = 0;
  for (size_t i = 0; i < in_tensors_.size() - 1; i++) {
    auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData());
    memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
    dest_loc += in_tensors_.at(i)->ElementsNum();
  }
  output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
  ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->MutableData());

  auto ret =
    ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num);
  context_->allocator->Free(input_addr_);
  context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
  auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_);
  FreeRunBuff();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
  }
  return ret;
 }

 void EmbeddingLookupCPUKernel::FreeRunBuff() {
  context_->allocator->Free(input_addr_);
  context_->allocator->Free(param_->is_regulated_);
  input_addr_ = nullptr;
  param_->is_regulated_ = nullptr;
 }

 kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
                                                        OpParameter *parameter, const lite::InnerContext *ctx,
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.h
@@ -27,30 +27,20 @@ class EmbeddingLookupCPUKernel : public LiteKernel {
  explicit EmbeddingLookupCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                    const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                    const mindspore::lite::PrimitiveC *primitive)
      : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {}
  ~EmbeddingLookupCPUKernel() override {
    if (input_addr_ != nullptr) {
      free(input_addr_);
    }
    if (embedding_lookup_parameter_->is_regulated_ != nullptr) {
      free(embedding_lookup_parameter_->is_regulated_);
    }
  };
      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
    param_ = reinterpret_cast<EmbeddingLookupParameter *>(parameter);
  }
  ~EmbeddingLookupCPUKernel() = default;

  int Init() override;
  int ReSize() override;
  int Run() override;
  int DoExcute(int task_id);

 protected:
  const lite::InnerContext *ctx_ = nullptr;
  int thread_count_ = 1;
  EmbeddingLookupParameter *embedding_lookup_parameter_ = nullptr;

 private:
  void FreeRunBuff();
  EmbeddingLookupParameter *param_ = nullptr;
  float *input_addr_ = nullptr;
  float *output_addr_ = nullptr;
  int *ids_addr_ = nullptr;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
@@ -44,7 +44,9 @@ void FullconnectionCPUKernel::FreeBuf() {
 int FullconnectionCPUKernel::ReSize() {
  FreeBuf();
  int row = 1;
  for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) row *= (out_tensors_[0]->shape())[i];
  for (size_t i = 0; i < out_tensors_[0]->shape().size() - 1; ++i) {
    row *= (out_tensors_[0]->shape())[i];
  }
  fc_param_->row_ = row;
  fc_param_->col_ = out_tensors_[0]->shape().back();
  fc_param_->deep_ = (in_tensors_[1]->shape())[1];
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
@@ -56,13 +56,12 @@ int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
 }

 int InstanceNormRun(void *cdata, int task_id) {
  auto InstanceNormData = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
  auto ret = InstanceNormData->DoInstanceNorm(task_id);
  auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
  auto ret = kernel->DoInstanceNorm(task_id);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
    return RET_ERROR;
  }
  return RET_OK;
  return ret;
 }

 int InstanceNormCPUKernel::Run() {
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
@@ -58,8 +58,8 @@ int LayerNormCPUKernel::DoLayerNorm(int thread_id) {
 }

 int LayerNormRun(void *cdata, int task_id) {
  auto LayerNormData = reinterpret_cast<LayerNormCPUKernel *>(cdata);
  auto ret = LayerNormData->DoLayerNorm(task_id);
  auto kernel = reinterpret_cast<LayerNormCPUKernel *>(cdata);
  auto ret = kernel->DoLayerNorm(task_id);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "LayerNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
@@ -72,7 +72,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
  return RET_OK;
 }

 int ConvolutionDepthwiseSWInt8CPUKernel::InitBuffer() {
 int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
  if (conv_param_->input_channel_ % C8NUM != 0) {
    need_align_ = true;

@@ -319,15 +319,10 @@ int ConvDwSWInt8Run(void *cdata, int task_id) {
 }

 int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
  auto ret = InitBuffer();
  auto ret = InitPackedInputOutput();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
    if (need_align_) {
      context_->allocator->Free(packed_input_);
      context_->allocator->Free(packed_output_);
      packed_input_ = nullptr;
      packed_output_ = nullptr;
    }
    FreePackedInputOutput();
    return ret;
  }

@@ -353,12 +348,17 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
  if (need_align_) {
    PackNHWC8ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_,
                        conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
  }
  FreePackedInputOutput();
  return ret;
 }

 void ConvolutionDepthwiseSWInt8CPUKernel::FreePackedInputOutput() {
  if (need_align_) {
    context_->allocator->Free(packed_input_);
    context_->allocator->Free(packed_output_);
    packed_input_ = nullptr;
    packed_output_ = nullptr;
  }
  return ret;
 }

 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
@@ -36,10 +36,11 @@ class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel {
  int Run() override;

  int InitWeightBias();
  int InitBuffer();
  int InitPackedInputOutput();
  int Execute(int task_id);

 private:
  void FreePackedInputOutput();
  int ReinitQuantParam();
  int ReinitFreeBefore();
  void FreeTmpQuant();