!4867 Clean cmake building warnings.

Merge pull request !4867 from wangshaocong/lite_clean
5 years ago · 6782325bfd
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@@ -64,14 +64,25 @@ set(CMAKE_VERBOSE_MAKEFILE on)
 add_compile_definitions(USE_ANDROID_LOG)
 add_compile_definitions(NO_DLIB)
 add_compile_options(-fPIC)
 if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
    #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
    string(REPLACE "-g" " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 if (NOT PLATFORM_ARM64 AND NOT PLATFORM_ARM32)
    if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
    else ()
        ## enable for binscope for release
        set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations ${CMAKE_C_FLAGS}")
        set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations ${CMAKE_CXX_FLAGS}")
        set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
        set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
        string(REPLACE    " -g " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
    endif ()
 endif ()

 if (BUILD_DEVICE)
    add_compile_definitions(BUILD_DEVICE)
 endif()
 endif ()
 if (SUPPORT_TRAIN)
    add_compile_definitions(SUPPORT_TRAIN)
 endif()
@@ -86,17 +97,17 @@ if (SUPPORT_GPU)
    add_definitions(-DMS_OPENCL_PROFILE=false)
    add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=200)
    add_compile_definitions(SUPPORT_GPU)
    if(OFFLINE_COMPILE)
    if (OFFLINE_COMPILE)
        add_compile_definitions(PROGRAM_WITH_IL)
    endif()
    endif ()
    include_directories(${TOP_DIR}/third_party/OpenCL-Headers)
    include_directories(${TOP_DIR}/third_party/OpenCL-CLHPP/include)
 endif()
 endif ()

 if (WIN32)
    add_compile_definitions(LITE_EXPORTS)
    add_compile_definitions(BUILDING_DLL)
 endif()
 endif ()

 set(ANF_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/../core/ir/meta_tensor.cc
@@ -110,26 +121,26 @@ if (BUILD_CONVERTER)
        MESSAGE(FATAL_ERROR "Cannot build converter in arm platform")
    endif()
    find_package(Python3 3.7 COMPONENTS Interpreter Development)
    if(Python3_FOUND)
    if (Python3_FOUND)
        set(PYTHON_INCLUDE_DIRS "${Python3_INCLUDE_DIRS}")
        set(PYTHON_LIBRARIES "${Python3_LIBRARIES}")
        if (WIN32)
            if (Python3_DIR)
                message("Python3_DIR set already: " ${Python3_DIR})
            else()
            else ()
                string(LENGTH ${PYTHON_LIBRARIES} PYTHON_LIBRARIES_LEN)
                string(LENGTH "libpythonxx.a" Python3_NAME_LEN)
                math(EXPR Python3_DIR_LEN  ${PYTHON_LIBRARIES_LEN}-${Python3_NAME_LEN})
                string(SUBSTRING ${Python3_LIBRARIES} 0 ${Python3_DIR_LEN} Python3_DIR)
                message("Python3_DIR: " ${Python3_DIR})
            endif()
            endif ()
            link_directories(${Python3_DIR})
        endif()
    else()
        endif ()
    else ()
        find_python_package(py_inc py_lib)
        set(PYTHON_INCLUDE_DIRS "${py_inc}")
        set(PYTHON_LIBRARIES "${py_lib}")
    endif()
    endif ()
    include_directories(${PYTHON_INCLUDE_DIRS})
    include(${TOP_DIR}/cmake/external_libs/json.cmake)
    include(${TOP_DIR}/cmake/external_libs/pybind11.cmake)
@@ -137,27 +148,27 @@ if (BUILD_CONVERTER)
    include_directories(${TOP_DIR}/third_party/protobuf/build/include)
    link_directories(${TOP_DIR}/third_party/protobuf/build/lib)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter)
 endif()
 endif ()

 if (BUILD_DEVICE)
    if (PLATFORM_ARM32 OR PLATFORM_ARM64)
        if (NOT DEFINED ENV{ANDROID_NDK})
            message(FATAL_ERROR "env ANDROID_NDK should be setted for ARM compile")
        endif()
        endif ()
        add_compile_definitions(ENABLE_ARM)
    endif()
    endif ()
    if (PLATFORM_ARM32)
        add_definitions(-mfloat-abi=softfp -mfpu=neon)
        add_compile_definitions(ENABLE_ARM32)
    endif()
    endif ()
    if (PLATFORM_ARM64)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
        add_compile_definitions(ENABLE_ARM64)
        if (ENABLE_FP16)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
        endif ()
    endif()
 endif()
    endif ()
 endif ()

 if (BUILD_MINDDATA)
    # opencv
@@ -167,7 +178,7 @@ if (BUILD_MINDDATA)
    # json
    if (NOT BUILD_CONVERTER)
        include(${TOP_DIR}/cmake/external_libs/json.cmake)
    endif()
    endif ()
    # eigen
    include_directories(${TOP_DIR}/third_party/eigen/)
    # jpeg-turbo
@@ -183,7 +194,7 @@ if (BUILD_MINDDATA)

    add_compile_definitions(ENABLE_ANDROID)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
 endif()
 endif ()

 if (BUILD_DEVICE)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
@@ -191,7 +202,7 @@ if (BUILD_DEVICE)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/time_profile)
    endif()
 endif()
    endif ()
 endif ()

 include(${TOP_DIR}/cmake/package_lite.cmake)
--- a/mindspore/lite/src/common/common.h
+++ b/mindspore/lite/src/common/common.h
@@ -37,11 +37,6 @@ static constexpr int kNHWCDimNumber = 4;

 static constexpr int TENSOR_MAX_REFCOUNT = 999;

 static const char *DELIM_COLON = ":";
 static const char *DELIM_COMMA = ",";
 static const char *DELIM_SLASH = "/";
 static const char *DELIM_DOUBLE_BACKSLASH = "\\";

 // quantization relative
 static const char QUANTIZED_UINT8[] = "QUANTIZED_UINT8";
 static const char QUANTIZED_INT8[] = "QUANTIZED_INT8";
--- a/mindspore/lite/src/common/file_utils.cc
+++ b/mindspore/lite/src/common/file_utils.cc
@@ -103,7 +103,7 @@ int WriteToBin(const std::string &file_path, void *data, size_t size) {

 int CompareOutputData(float *output_data, float *correct_data, int data_size) {
  float error = 0;
  for (size_t i = 0; i < data_size; i++) {
  for (int i = 0; i < data_size; i++) {
    float abs = fabs(output_data[i] - correct_data[i]);
    if (abs > 0.00001) {
      error += abs;
--- a/mindspore/lite/src/ir/tensor.cc
+++ b/mindspore/lite/src/ir/tensor.cc
@@ -237,7 +237,7 @@ std::string Tensor::ToString() const {
      if (data == nullptr) {
        return "Data of tensor is nullptr";
      } else {
        for (size_t i = 0; i < 40 && i < this->ElementsNum(); i++) {
        for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
          oss << " " << data[i];
        }
      }
@@ -247,7 +247,7 @@ std::string Tensor::ToString() const {
      if (data == nullptr) {
        return "Data of tensor is nullptr";
      } else {
        for (size_t i = 0; i < 40 && i < this->ElementsNum(); i++) {
        for (int i = 0; i < 40 && i < this->ElementsNum(); i++) {
          oss << " " << data[i];
        }
      }
--- a/mindspore/lite/src/ir/tensor.h
+++ b/mindspore/lite/src/ir/tensor.h
@@ -187,8 +187,8 @@ class Tensor : public mindspore::tensor::MetaTensor {
 protected:
  void *data_ = nullptr;
  void *device_data_ = nullptr;
  schema::NodeType tensorType;
  schema::Format format_;
  schema::NodeType tensorType;
  size_t refCount = 0;
  std::vector<tensor::QuantArg> quant_params_;
  mindspore::lite::Allocator *allocator_ = nullptr;
--- a/mindspore/lite/src/lite_kernel.h
+++ b/mindspore/lite/src/lite_kernel.h
@@ -154,11 +154,11 @@ class LiteKernel {
  KernelKey desc_;
  std::string name_;
  OpParameter *op_parameter_ = nullptr;
  const mindspore::lite::PrimitiveC *primitive_ = nullptr;
  const lite::Context *context_ = nullptr;
  // tensor will free in ~lite_session()
  std::vector<lite::tensor::Tensor *> in_tensors_;
  std::vector<lite::tensor::Tensor *> out_tensors_;
  const mindspore::lite::PrimitiveC *primitive_ = nullptr;
  const lite::Context *context_ = nullptr;
  std::vector<LiteKernel *> in_kernels_;
  std::vector<LiteKernel *> out_kernels_;
  bool train_mode_ = false;
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@@ -66,7 +66,7 @@ int LiteSession::ConvertTensors(const lite::Model *model) {
    }
    auto quant_params = srcTensor->quantParams();
    if (quant_params != nullptr) {
      for (int j = 0; j < quant_params->size(); j++) {
      for (size_t j = 0; j < quant_params->size(); j++) {
        tensor::QuantArg quant_arg{};
        quant_arg.scale = quant_params->Get(j)->scale();
        quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint();
@@ -93,9 +93,7 @@ void LiteSession::InitGraphInputTensors(const lite::Model *model) {
 }

 void LiteSession::InitGraphInputMSTensors(const lite::Model *model) {
  auto meta_graph = model->GetMetaGraph();
  MS_ASSERT(this->input_vec_.empty());
  MS_ASSERT(meta_graph != nullptr);
  for (auto &input_tensor : this->inputs_) {
    MS_ASSERT(input_tensor != nullptr);
    this->input_vec_.emplace_back(new lite::tensor::LiteTensor(input_tensor));
--- a/mindspore/lite/src/ops/fill.cc
+++ b/mindspore/lite/src/ops/fill.cc
@@ -52,10 +52,9 @@ int Fill::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::
  }

  std::vector<int> output_shape;
  for (int i = 0; i < GetDims().size(); i++) {
  for (size_t i = 0; i < GetDims().size(); i++) {
    output_shape.push_back(GetDims()[i]);
  }
 //  (void)output_shape.insert(output_shape.begin(), GetDims().begin(), GetDims().end());
  output->set_shape(output_shape);
  return RET_OK;
 }
--- a/mindspore/lite/src/ops/full_connection.cc
+++ b/mindspore/lite/src/ops/full_connection.cc
@@ -64,7 +64,7 @@ int FullConnection::InferShape(std::vector<lite::tensor::Tensor *> inputs_,
  }
  int new_k = 1;
  if (GetUseAxis()) {
    for (int i = GetAxis(); i < input0->shape().size(); ++i) {
    for (size_t i = GetAxis(); i < input0->shape().size(); ++i) {
      new_k *= input0->shape()[i];
    }
    if (new_k != input1->shape()[1]) {
@@ -86,7 +86,7 @@ int FullConnection::InferShape(std::vector<lite::tensor::Tensor *> inputs_,
    out_shape[GetAxis()] = input1->shape()[0];
  } else {
    int total = 1;
    for (int i = 0; i < input0->shape().size(); ++i) {
    for (size_t i = 0; i < input0->shape().size(); ++i) {
      total *= input0->shape()[i];
    }
    out_shape.resize(2);
--- a/mindspore/lite/src/ops/pad.cc
+++ b/mindspore/lite/src/ops/pad.cc
@@ -43,7 +43,6 @@ void Pad::SetPaddingMode(int padding_mode) {}
 void Pad::SetConstantValue(float constant_value) {}
 #endif
 namespace {
 const size_t kPaddingsSize = 8;
 const size_t kInputRank = 4;
 }  // namespace
 int Pad::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) {
--- a/mindspore/lite/src/ops/reshape.cc
+++ b/mindspore/lite/src/ops/reshape.cc
@@ -145,10 +145,9 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso
      }
    }
  } else if (inputs_.size() == kSingleNum) {
    for (int i = 0; i < GetShape().size(); ++i) {
    for (size_t i = 0; i < GetShape().size(); ++i) {
      out_shape.push_back(GetShape()[i]);
    }
 //    std::copy(GetShape().begin(), GetShape().end(), std::back_inserter(out_shape));
  } else {
    MS_LOG(ERROR) << "inputs tensor size invalid.";
    return RET_INFER_ERR;
--- a/mindspore/lite/src/ops/split.cc
+++ b/mindspore/lite/src/ops/split.cc
@@ -75,7 +75,7 @@ int Split::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor:
  int split_dim = GetSplitDim();
  std::vector<int> input_shape = input->shape();
  std::vector<int> size_split;
  for (int i = 0; i < GetSizeSplits().size(); ++i) {
  for (size_t i = 0; i < GetSizeSplits().size(); ++i) {
    size_split.push_back(GetSizeSplits()[i]);
  }
  for (int i = 0; i < number_split; ++i) {
--- a/mindspore/lite/src/ops/tile.cc
+++ b/mindspore/lite/src/ops/tile.cc
@@ -60,10 +60,9 @@ int Tile::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::
  MS_ASSERT(tile_prim != nullptr);
  std::vector<int> out_shape;
  std::vector<int> multiples;
  for (int i = 0; i < GetMultiples().size(); ++i) {
  for (size_t i = 0; i < GetMultiples().size(); ++i) {
    multiples.push_back(GetMultiples()[i]);
  }
 //  std::copy(GetMultiples().begin(), GetMultiples().end(), std::back_inserter(multiples));
  for (size_t i = 0; i < input->shape().size(); ++i) {
    int tmp = input->shape()[i] * multiples[i];
    out_shape.push_back(tmp);
--- a/mindspore/lite/src/ops/transpose.cc
+++ b/mindspore/lite/src/ops/transpose.cc
@@ -59,10 +59,9 @@ int Transpose::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<ten
    return RET_ERROR;
  }
  std::vector<int> perm;
  for (int i = 0; i < GetPerm().size(); i++) {
  for (size_t i = 0; i < GetPerm().size(); i++) {
    perm.push_back(GetPerm()[i]);
  }
 //  perm.insert(perm.begin(), GetPerm().begin(), GetPerm().end());
  std::vector<int> in_shape = input->shape();
  std::vector<int> out_shape;
  out_shape.resize(perm.size());
--- a/mindspore/lite/src/populate_parameter.cc
+++ b/mindspore/lite/src/populate_parameter.cc
@@ -246,7 +246,7 @@ OpParameter *PopulatePreluParameter(const mindspore::lite::PrimitiveC *primitive
  }
  prelu_param->op_parameter_.type_ = primitive->Type();
  auto temp = param->GetSlope();
  for (int i = 0; i < temp.size(); i++) {
  for (size_t i = 0; i < temp.size(); i++) {
    prelu_param->slope_[i] = temp[i];
  }
  return reinterpret_cast<OpParameter *>(prelu_param);
@@ -404,7 +404,6 @@ OpParameter *PopulateConvDwParameter(const mindspore::lite::PrimitiveC *primitiv
  conv_param->stride_h_ = conv_primitive->GetStrideH();
  conv_param->stride_w_ = conv_primitive->GetStrideW();

  auto pad_mode = conv_primitive->GetPadMode();
  auto convdw_lite_primitive = (lite::DepthwiseConv2D *)primitive;
  MS_ASSERT(nullptr != convdw_lite_primitive);
  conv_param->pad_u_ = convdw_lite_primitive->PadUp();
@@ -828,7 +827,7 @@ OpParameter *PopulateTileParameter(const mindspore::lite::PrimitiveC *primitive)
  auto param = dynamic_cast<const mindspore::lite::Tile *>(primitive);
  auto multiples = param->GetMultiples();
  tile_param->in_dim_ = multiples.size();
  for (size_t i = 0; i < tile_param->in_dim_; ++i) {
  for (int i = 0; i < tile_param->in_dim_; ++i) {
    tile_param->multiples_[i] = multiples[i];
  }
  return reinterpret_cast<OpParameter *>(tile_param);
@@ -1231,7 +1230,7 @@ OpParameter *PopulateCropParameter(const mindspore::lite::PrimitiveC *primitive)
  crop_param->op_parameter_.type_ = primitive->Type();
  crop_param->axis_ = param->GetAxis();
  crop_param->offset_size_ = param_offset.size();
  for (int i = 0; i < param_offset.size(); ++i) {
  for (size_t i = 0; i < param_offset.size(); ++i) {
    crop_param->offset_[i] = param_offset[i];
  }
  return reinterpret_cast<OpParameter *>(crop_param);
--- a/mindspore/lite/src/runtime/kernel/arm/base/caffeprelu_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/caffeprelu_base.h
@@ -43,8 +43,8 @@ class CaffePreluBaseCPUKernel : public LiteKernel {
  int Run() override { return 0; }

 protected:
  int thread_count_;
  const Context *ctx_;
  int thread_count_;
  CaffePreluParameter *prelu_param_;
 };
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
@@ -43,9 +43,9 @@ class ConcatBaseCPUKernel : public LiteKernel {
  int Run() override { return 0; }

 protected:
  int thread_count_;
  int axis_;
  const Context *ctx_;
  int thread_count_;
  ConcatParameter *concat_param_ = nullptr;
 };
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
@@ -121,7 +121,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {
  uint8_t per_channel = 0b0;
  if (conv_quant_arg_->input_arg_num_ != kPerTensor) {
    int in_channel = conv_param_->input_channel_;
    if (conv_quant_arg_->input_arg_num_ != in_channel) {
    if (static_cast<int>(conv_quant_arg_->input_arg_num_) != in_channel) {
      MS_LOG(ERROR) << "input per channel quant param length is not equal to input channel.";
      return RET_ERROR;
    }
@@ -130,7 +130,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {

  if (conv_quant_arg_->filter_arg_num_ != kPerTensor) {
    int filter_num = conv_param_->output_channel_;
    if (conv_quant_arg_->filter_arg_num_ != filter_num) {
    if (static_cast<int>(conv_quant_arg_->filter_arg_num_) != filter_num) {
      MS_LOG(ERROR) << "weight per channel quant param length is not equal to filter num.";
      return RET_ERROR;
    }
@@ -139,7 +139,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {

  if (conv_quant_arg_->output_arg_num_ != kPerTensor) {
    int out_channel = conv_param_->output_channel_;
    if (conv_quant_arg_->output_arg_num_ != out_channel) {
    if (static_cast<int>(conv_quant_arg_->output_arg_num_) != out_channel) {
      MS_LOG(ERROR) << "output per channel quant param length is not equal to output channel.";
      return RET_ERROR;
    }
@@ -218,11 +218,6 @@ int ConvolutionBaseCPUKernel::SetInputTensorQuantParam() {
    // per channel
    MS_LOG(ERROR) << "Not Support Per Channel for input now.";
    return RET_ERROR;
    //    auto input_quant_arg = input_tensor->GetQuantParams();
    //    for (int i = 0; i < in_arg_num; ++i) {
    //      conv_quant_arg_->input_quant_args_[i].zp_ = input_quant_arg[i].zeroPoint;
    //      conv_quant_arg_->input_quant_args_[i].scale_ = input_quant_arg[i].scale;
    //    }
  }
  return RET_OK;
 }
@@ -236,7 +231,7 @@ int ConvolutionBaseCPUKernel::SetFilterTensorQuantParam() {
    conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale;
  } else {
    auto weight_quant_arg = weight_tensor->GetQuantParams();
    for (int i = 0; i < weight_arg_num; ++i) {
    for (size_t i = 0; i < weight_arg_num; ++i) {
      conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint;
      conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale;
    }
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
@@ -62,11 +62,11 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
  void FreeQuantParam();

 protected:
  int thread_count_;
  int tile_num_;
  void *bias_data_ = nullptr;
  void *nhwc4_input_ = nullptr;
  const Context *ctx_;
  int thread_count_;
  ConvParameter *conv_param_;
  ConvQuantArg *conv_quant_arg_;
  LayoutConvertor convert_func_;
--- a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
@@ -41,9 +41,9 @@ class FullconnectionBaseCPUKernel : public LiteKernel {

 protected:
  MatMulParameter *fc_param_;
  int thread_count_;
  int thread_stride_;
  const Context *ctx_;
  int thread_count_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
@@ -41,9 +41,9 @@ class MatmulBaseCPUKernel : public LiteKernel {

 protected:
  MatMulParameter *params_;
  int thread_count_;
  int thread_stride_;
  const Context *ctx_;
  int thread_count_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h
@@ -43,8 +43,8 @@ class PoolingBaseCPUKernel : public LiteKernel {
  void FreeQuantParam();

 protected:
  int thread_count_;
  const Context *ctx_;
  int thread_count_;
  PoolingParameter *pooling_param_;
  QuantArg **pooling_quant_arg_ = nullptr;
 };
--- a/mindspore/lite/src/runtime/kernel/arm/base/prior_box.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/prior_box.h
@@ -41,8 +41,8 @@ class PriorBoxCPUKernel : public LiteKernel {
  int PriorBoxImpl(int task_id);

 protected:
  int thread_count_;
  const Context *ctx_;
  int thread_count_;

 private:
  std::vector<float> output_;
--- a/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
@@ -76,7 +76,7 @@ int ReduceBaseCPUKernel::CheckParameters() {
  }

  if (num_axes_ == 0) {
    for (int i = 0; i < input_rank; i++) {
    for (size_t i = 0; i < input_rank; i++) {
      axes_[i] = i;
    }
    num_axes_ = static_cast<int>(input_rank);
--- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
@@ -45,7 +45,7 @@ int SliceBaseCPUKernel::ReSize() {
      param_->begin_[DIMENSION_4D - j] = param_->begin_[i];
      param_->size_[DIMENSION_4D - j] = param_->size_[i];
    }
    for (size_t i = 0; i < DIMENSION_4D - param_->param_length_; i++) {
    for (int i = 0; i < DIMENSION_4D - param_->param_length_; i++) {
      param_->begin_[i] = 0;
      param_->size_[i] = 1;
    }
--- a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
@@ -37,8 +37,8 @@ class SoftmaxBaseCPUKernel : public LiteKernel {
  int Run() override { return 0; }

 protected:
  int thread_count_;
  const lite::Context *ctx_;
  int thread_count_;
  SoftmaxParameter *softmax_param_;
 };
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/base/split_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/split_base.h
@@ -39,8 +39,8 @@ class SplitBaseCPUKernel : public LiteKernel {
  int Run() override { return 0; }

 protected:
  int thread_count_;
  const Context *ctx_;
  int thread_count_;
  int thread_n_stride_;
  int thread_n_num_;
  int num_unit_;
--- a/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
@@ -41,9 +41,9 @@ class SqueezeBaseCPUKernel : public LiteKernel {
  int Run() override { return 0; }

 protected:
  int thread_count_;
  int *axis_;
  const Context *ctx_;
  int thread_count_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
@@ -44,7 +44,7 @@ int AddNCPUKernel::ReSize() { return RET_OK; }

 int AddNCPUKernel::AddNParallelRun(int thread_id) {
  int count_per_thread = UP_DIV(elements_num_, op_parameter_->thread_num_);
  int count = MSMIN(count_per_thread, elements_num_ - thread_id * count_per_thread);
  int count = MSMIN(count_per_thread, static_cast<int>(elements_num_ - thread_id * count_per_thread));
  auto stride = count_per_thread * thread_id;
  auto ret = ElementAdd(in1_addr_ + stride, in2_addr_ + stride, out_addr_ + stride, count);
  if (ret != NNACL_OK) {
@@ -64,9 +64,9 @@ int AddNCPUKernel::Run() {
  auto input0_data = reinterpret_cast<float *>(in_tensors_[0]->Data());
  auto input1_data = reinterpret_cast<float *>(in_tensors_[1]->Data());
  auto output_data = reinterpret_cast<float *>(out_tensors_[0]->Data());
  if (elements_num_ < op_parameter_->thread_num_) {
  if (static_cast<int>(elements_num_) < op_parameter_->thread_num_) {
    ElementAdd(input0_data, input1_data, output_data, elements_num_);
    for (int i = 2; i < in_tensors_.size(); ++i) {
    for (size_t i = 2; i < in_tensors_.size(); ++i) {
      ElementAdd(reinterpret_cast<float *>(in_tensors_[i]->Data()), output_data, output_data, elements_num_);
    }
    return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
@@ -36,7 +36,7 @@ int ArithmeticSelfCPUKernel::Init() {

 int ArithmeticSelfCPUKernel::ReSize() {
  data_size_ = in_tensors_[0]->ElementsNum();
  thread_sz_count_ = MSMIN(thread_count_, data_size_);
  thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_));
  thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_);
  return RET_OK;
 }
@@ -52,7 +52,7 @@ int ArithmeticSelfRuns(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
 }

 int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) {
  int size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_);
  int size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_));
  if (size <= 0) {
    return RET_OK;
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
@@ -97,13 +97,13 @@ class ArithmeticSelfCPUKernel : public LiteKernel {
  int DoArithmeticSelf(int task_id);

 private:
  int thread_count_;
  int thread_sz_count_;
  int thread_sz_stride_;
  size_t data_size_;
  ArithmeticSelfParameter *arithmeticSelfParameter_;
  ArithmeticSelfRun arithmeticSelf_run_;
  const Context *ctx_;
  int thread_count_;
  float *in_ptr_;
  float *out_ptr_;
 };
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
@@ -79,7 +79,7 @@ int BatchnormCPUKernel::ReSize() {
  auto n_dim = input_shapes.size();
  batchnorm_param_->channel_ = input_shapes[n_dim - 1];
  batchnorm_param_->unit_ = 1;
  for (int i = 0; i < n_dim - 1; i++) {
  for (size_t i = 0; i < n_dim - 1; i++) {
    batchnorm_param_->unit_ *= input_shapes[i];
  }
  batchnorm_param_->op_parameter_.thread_num_ =
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
@@ -32,7 +32,7 @@ int BiasCPUKernel::ReSize() {
  auto dims = in_tensors_[0]->shape();
  MS_ASSERT(dims.size() <= 5);
  bias_param_->ndim_ = dims.size();
  for (int i = 0; i < bias_param_->ndim_; i++) {
  for (size_t i = 0; i < bias_param_->ndim_; i++) {
    bias_param_->in_shape0_[i] = dims[i];
    bias_param_->in_shape1_[i] = 1;
    bias_param_->out_shape_[i] = dims[i];
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/caffeprelu.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/caffeprelu.h
@@ -43,8 +43,8 @@ class CaffePReluCPUKernel : public LiteKernel {
  int DoExcute(int task_id);

 protected:
  int thread_count_;
  const Context *ctx_;
  int thread_count_;
  CaffePReluParameter *prelu_param_;

 private:
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
@@ -52,7 +52,7 @@ int CastCPUKernel::ReSize() {
  if (data_num_ == 0) {
    return RET_OK;
  }
  op_parameter_->thread_num_ = MSMIN(op_parameter_->thread_num_, data_num_);
  op_parameter_->thread_num_ = MSMIN(op_parameter_->thread_num_, static_cast<int>(data_num_));
  stride_ = UP_DIV(data_num_, op_parameter_->thread_num_);
  return RET_OK;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc
@@ -41,7 +41,10 @@ int EluCPUKernel::ReSize() {
  return RET_OK;
 }

 int EluCPUKernel::DoExcute(int task_id) { Elu(input_addr, output_addr, elu_parameter_, task_id); }
 int EluCPUKernel::DoExcute(int task_id) {
  Elu(input_addr, output_addr, elu_parameter_, task_id);
  return RET_OK;
 }

 int EluRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
  auto EluData = reinterpret_cast<EluCPUKernel *>(cdata);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.h
@@ -36,8 +36,8 @@ class EluCPUKernel : public LiteKernel {
  int DoExcute(int task_id);

 protected:
  int thread_count_;
  const lite::Context *ctx_;
  int thread_count_;
  EluParameter *elu_parameter_;

 private:
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
@@ -40,12 +40,12 @@ int EmbeddingLookupCPUKernel::ReSize() {

  embedding_lookup_parameter_->layer_size_ = 1;
  auto in_shape = in_tensors_.front()->shape();
  for (int i = 1; i < in_shape.size(); ++i) {
  for (size_t i = 1; i < in_shape.size(); ++i) {
    embedding_lookup_parameter_->layer_size_ *= in_shape[i];
  }

  embedding_lookup_parameter_->layer_num_ = 0;
  for (int i = 0; i < in_tensors_.size() - 1; ++i) {
  for (size_t i = 0; i < in_tensors_.size() - 1; ++i) {
    embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0];
  }

@@ -94,7 +94,7 @@ int EmbeddingLookupCPUKernel::Run() {
  }

  int dest_loc = 0;
  for (int i = 0; i < in_tensors_.size() - 1; i++) {
  for (size_t i = 0; i < in_tensors_.size() - 1; i++) {
    auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->Data());
    memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
    dest_loc += in_tensors_.at(i)->ElementsNum();
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.h
@@ -43,8 +43,8 @@ class EmbeddingLookupCPUKernel : public LiteKernel {
  int DoExcute(int task_id);

 protected:
  int thread_count_;
  const lite::Context *ctx_;
  int thread_count_;
  EmbeddingLookupParameter *embedding_lookup_parameter_;

 private:
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc
@@ -37,13 +37,13 @@ int ExpandDimsCPUKernel::Init() {

 int ExpandDimsCPUKernel::ReSize() {
  data_size_ = in_tensors_.at(0)->ElementsNum();
  thread_sz_count_ = MSMIN(thread_count_, data_size_);
  thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_));
  thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_);
  return RET_OK;
 }

 int ExpandDimsCPUKernel::DoExpandDims(int task_id) {
  size_t size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_);
  size_t size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_));
  if (size == 0) {
    return RET_OK;
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.h
@@ -41,13 +41,13 @@ class ExpandDimsCPUKernel : public LiteKernel {
  int DoExpandDims(int task_id);

 private:
  int thread_count_;
  int thread_sz_count_;
  int thread_sz_stride_;
  size_t data_size_;
  float *in_ptr_;
  float *out_ptr_;
  const Context *ctx_;
  int thread_count_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill.h
@@ -39,13 +39,13 @@ class FillCPUKernel : public LiteKernel {
  int DoFill(int task_id);

 private:
  int thread_count_;
  int thread_sz_count_;
  int thread_sz_stride_;
  int data_size_;
  float src_data_;
  float *out_ptr_;
  const Context *ctx_;
  int thread_count_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten.cc
@@ -37,7 +37,7 @@ int FlattenCPUKernel::Init() {
 int FlattenCPUKernel::ReSize() {
  auto output_shape = out_tensors_[0]->shape();
  flatten_param_->size = sizeof(float);
  for (int i = 0; i < output_shape.size(); i++) {
  for (size_t i = 0; i < output_shape.size(); i++) {
    flatten_param_->size *= output_shape[i];
  }
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
@@ -98,7 +98,7 @@ int FusedBatchnormCPUKernel::ReSize() {
  auto n_dim = input_shapes.size();
  batchnorm_param_->channel_ = input_shapes[n_dim - 1];
  batchnorm_param_->unit_ = 1;
  for (int i = 0; i < n_dim - 1; i++) {
  for (size_t i = 0; i < n_dim - 1; i++) {
    batchnorm_param_->unit_ *= input_shapes[i];
  }
  batchnorm_param_->op_parameter_.thread_num_ =
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc
@@ -57,7 +57,7 @@ int GatherCPUKernel::DoGather(int task_id) {
  int indices_element_size = indices_tensor->ElementsNum();

  const int limit = in_shape[axis_];
  for (size_t i = 0; i < indices_element_size; ++i) {
  for (int i = 0; i < indices_element_size; ++i) {
    if (indices_ptr[i] >= limit) {
      MS_LOG(ERROR) << " indice data: " << indices_ptr[i] << " is not in [ 0, " << limit - 1 << " ]";
      return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.h
@@ -41,7 +41,6 @@ class GatherNdCPUKernel : public LiteKernel {
  int DoGatherNd(int task_id);

 private:
  int thread_count_;
  int thread_sz_count_;
  int thread_sz_stride_;
  int count_;
@@ -50,6 +49,7 @@ class GatherNdCPUKernel : public LiteKernel {
  float *in_ptr_;
  float *out_ptr_;
  const Context *ctx_;
  int thread_count_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
@@ -59,7 +59,7 @@ int MatmulCPUKernel::ReSize() {
    }
  }

  for (int i = 0; i < a_shape.size() - 2; ++i) {
  for (size_t i = 0; i < a_shape.size() - 2; ++i) {
    batch *= a_shape[i];
  }
  params_->batch = batch;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc
@@ -62,7 +62,7 @@ int PadCPUKernel::ReSize() {
    return RET_ERROR;
  }

  for (int i = 0; i < rank; i++) {
  for (size_t i = 0; i < rank; i++) {
    in_[DEFAULT_PAD_NDIMS - rank + i] = input->shape()[i];
    out_[DEFAULT_PAD_NDIMS - rank + i] = output->shape()[i];
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.h
@@ -43,8 +43,8 @@ class PReluCPUKernel : public LiteKernel {
  int DoExcute(int task_id);

 protected:
  int thread_count_;
  const Context *ctx_;
  int thread_count_;
  PReluParameter *prelu_param_;

 private:
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc
@@ -113,7 +113,7 @@ int ReduceCPUKernel::Run() {
  }
  tmp_shape_ = in_tensors_.at(0)->shape();
  src_data_ = static_cast<float *>(in_tensors_.at(0)->Data());
  for (int i = 0; i < data_buffers_.size(); ++i) {
  for (size_t i = 0; i < data_buffers_.size(); ++i) {
    dst_data_ = data_buffers_[i];
    int axis = axes_[i];
    outer_size_ = 1;
@@ -167,8 +167,8 @@ int ReduceCPUKernel::MallocTmpBuffer() {
  for (auto i = 0; i < num_axes_ - 1; i++) {
    int axis = axes_[i];
    size_t size = 1;
    for (auto j = 0; j < input_shape.size(); j++) {
      if (static_cast<size_t>(axis) != j) {
    for (size_t j = 0; j < input_shape.size(); j++) {
      if (axis != static_cast<int>(j)) {
        size *= input_shape[j];
      }
    }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.h
@@ -36,7 +36,7 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel {
                  const mindspore::lite::PrimitiveC *primitive)
      : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {}
  ~ReduceCPUKernel() {
    for (auto i = 0; i < data_buffers_.size(); i++) {
    for (size_t i = 0; i < data_buffers_.size(); i++) {
      float *buffer = data_buffers_[i];
      if (buffer != nullptr) {
        free(buffer);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc
@@ -30,8 +30,8 @@ using mindspore::schema::PrimitiveType_Reverse;
 namespace mindspore::kernel {

 int ReverseCPUKernel::Stride(int index) {
  int i, stride = 1;
  for (i = index + 1; i < in_tensors_[0]->shape().size(); ++i) {
  int stride = 1;
  for (size_t i = index + 1; i < in_tensors_[0]->shape().size(); ++i) {
    stride *= in_tensors_[0]->shape()[i];
  }
  return stride;
@@ -44,7 +44,7 @@ int ReverseCPUKernel::ReSize() {

  auto *param = reinterpret_cast<ReverseParameter *>(op_parameter_);
  auto input_shape = in_tensors_[0]->shape();
  if (param->num_axis_ > input_shape.size()) {
  if (param->num_axis_ > static_cast<int>(input_shape.size())) {
    MS_LOG(ERROR) << "Reverse dims : " << param->num_axis_
                  << "is greater than input shape size :" << input_shape.size();
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.h
@@ -46,7 +46,6 @@ class ReverseCPUKernel : public LiteKernel {
  int DoReverse(int task_id);

 private:
  int thread_count_;
  int thread_sz_count_;
  int thread_sz_stride_;
  int data_size_;
@@ -54,6 +53,7 @@ class ReverseCPUKernel : public LiteKernel {
  int inCount_[REVERSE_STRIDE_MAX_SIZE];
  int outCount_[REVERSE_STRIDE_MAX_SIZE];
  const Context *ctx_;
  int thread_count_;
  int *tmp_ = nullptr;
  float *in_ptr_;
  float *out_ptr_;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence.cc
@@ -45,7 +45,7 @@ int ReverseSequenceCPUKernel::CalcCountPreAxis(const std::vector<int> shape, int
 }
 int ReverseSequenceCPUKernel::CalcCountAfterAxis(const std::vector<int> shape, int axis) {
  int count = 1;
  for (int i = axis + 1; i < shape.size(); ++i) {
  for (size_t i = axis + 1; i < shape.size(); ++i) {
    count *= shape[i];
  }
  return count;
@@ -53,10 +53,8 @@ int ReverseSequenceCPUKernel::CalcCountAfterAxis(const std::vector<int> shape, i

 int ReverseSequenceCPUKernel::ReSize() {
  auto input0 = in_tensors_.at(0);
  auto input1 = in_tensors_.at(1);
  auto output = out_tensors_.at(0);
  MS_ASSERT(input0 != nullptr);
  MS_ASSERT(input1 != nullptr);
  MS_ASSERT(output != nullptr);

  auto para = reinterpret_cast<ReverseSequenceParameter *>(op_parameter_);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
@@ -91,14 +91,14 @@ int ScaleCPUKernel::InitParameter() {
  for (int i = 0; i < scale_param_->axis_; i++) {
    scale_param_->outer_size_ *= in_shape[i];
  }
  for (int i = 0; i < scale_shape.size(); i++) {
  for (size_t i = 0; i < scale_shape.size(); i++) {
    if (in_shape[i + scale_param_->axis_] != scale_shape[i]) {
      MS_LOG(ERROR) << "Scale tensor shape is incorrect.";
      return RET_ERROR;
    }
    scale_param_->axis_size_ *= in_shape[i + scale_param_->axis_];
  }
  for (int i = scale_param_->axis_ + scale_shape.size(); i < in_shape.size(); i++) {
  for (size_t i = scale_param_->axis_ + scale_shape.size(); i < in_shape.size(); i++) {
    scale_param_->inner_size_ *= in_shape[i];
  }
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc
@@ -68,7 +68,7 @@ int ScatterNDCPUKernel::ReSize() {
  // check consistency of the shape indices and shape
  auto update_rank = static_cast<int>(update->shape().size());
  auto indices_shape = indices->shape();
  if (update_rank != indices->shape().size() - 1 + shape_rank - indice_unit_rank) {
  if (update_rank != static_cast<int>(indices->shape().size() - 1 + shape_rank - indice_unit_rank)) {
    MS_LOG(ERROR) << "Update, shape rank and indices rank inconsistent.";
    return RET_ERROR;
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/shape.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/shape.cc
@@ -51,7 +51,7 @@ int ShapeCPUKernel::Run() {
    return RET_ERROR;
  }

  for (int i = 0; i < in_tensor->shape().size(); i++) {
  for (size_t i = 0; i < in_tensor->shape().size(); i++) {
    reinterpret_cast<int *>(out_tensor->Data())[i] = in_tensor->shape()[i];
  }

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc
@@ -42,7 +42,7 @@ int SliceLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
 int SliceCPUKernel::ReSize() {
  auto *param = reinterpret_cast<SliceParameter *>(op_parameter_);
  auto input_shape = in_tensors_[0]->shape();
  if (input_shape.size() != param->param_length_) {
  if (static_cast<int>(input_shape.size()) != param->param_length_) {
    MS_LOG(ERROR) << "Input begin's lenth " << param->param_length_ << "is not equal to input shape size "
                  << input_shape.size();
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.h
@@ -42,8 +42,8 @@ class SparseToDenseCPUKernel : public LiteKernel {
  int DoExcute(int task_id);

 protected:
  int thread_count_;
  const Context *ctx_;
  int thread_count_;
  SparseToDenseParameter *s2d_param_;

 private:
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
@@ -38,7 +38,7 @@ int TopKCPUKernel::ReSize() {
  TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
  parameter->last_dim_size_ = input->shape()[input->shape().size() - 1];
  parameter->loop_num_ = 1;
  for (int i = 0; i < input->shape().size() - 1; ++i) {
  for (size_t i = 0; i < input->shape().size() - 1; ++i) {
    parameter->loop_num_ *= input->shape()[i];
  }
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack.cc
@@ -42,10 +42,10 @@ int UnstackCPUKernel::ReSize() {
  if (para->axis_ < 0) {
    para->axis_ += shape_size;
  }
  for (size_t i = 0; i < shape_size; i++) {
    if (i < para->axis_) {
  for (size_t i = 0; i < static_cast<size_t>(shape_size); i++) {
    if (static_cast<int>(i) < para->axis_) {
      para->pre_dims_ *= input->DimensionSize(i);
    } else if (i > para->axis_) {
    } else if (static_cast<int>(i) > para->axis_) {
      para->after_dims_ *= input->DimensionSize(i);
    } else {
      para->axis_dim_ = input->DimensionSize(i);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/where.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/where.h
@@ -42,8 +42,8 @@ class WhereCPUKernel : public LiteKernel {
  int DoExcute(int task_id);

 protected:
  int thread_count_;
  const Context *ctx_;
  int thread_count_;
  WhereParameter *where_param_;

 private:
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
@@ -60,7 +60,7 @@ int ArithmeticSelfInt8CPUKernel::Init() {

 int ArithmeticSelfInt8CPUKernel::ReSize() {
  data_size_ = in_tensors_[0]->ElementsNum();
  thread_sz_count_ = MSMIN(thread_count_, data_size_);
  thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_));
  thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_);
  return RET_OK;
 }
@@ -76,7 +76,7 @@ int ArithmeticSelfInt8Runs(int task_id, LiteParallelGroupEnv *penv, void *cdata)
 }

 int ArithmeticSelfInt8CPUKernel::DoArithmeticSelf(int task_id) {
  int size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_);
  int size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_));
  if (size <= 0) {
    return RET_OK;
  }
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
@@ -93,13 +93,13 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel {
  int DoArithmeticSelf(int task_id);

 private:
  int thread_count_;
  int thread_sz_count_;
  int thread_sz_stride_;
  size_t data_size_;
  ArithmeticSelfParameter *para_;
  ArithmeticSelfInt8Run arithmeticSelf_run_;
  const Context *ctx_;
  int thread_count_;
  int8_t *in_ptr_;
  int8_t *out_ptr_;
 };
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
@@ -143,7 +143,7 @@ int BatchnormInt8CPUKernel::Init() {
  auto n_dim = input_shapes.size();
  batchnorm_param_->channel_ = input_shapes[n_dim - 1];
  batchnorm_param_->units_ = 1;
  for (int i = 0; i < n_dim - 1; i++) {
  for (size_t i = 0; i < n_dim - 1; i++) {
    batchnorm_param_->units_ *= input_shapes[i];
  }
  batchnorm_param_->op_parameter_.thread_num_ =
@@ -169,7 +169,7 @@ int BatchnormInt8CPUKernel::Init() {
 int BatchnormInt8CPUKernel::ReSize() {
  auto input_shapes = in_tensors_[0]->shape();
  batchnorm_param_->unit_ = 1;
  for (int i = 0; i < input_shapes.size() - 1; i++) {
  for (size_t i = 0; i < input_shapes.size() - 1; i++) {
    batchnorm_param_->unit_ *= input_shapes[i];
  }
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.cc
@@ -36,7 +36,7 @@ int BiasAddInt8CPUKernel::ReSize() {
  auto bias_param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
  auto dims = in_tensors_[0]->shape();
  bias_param->ndim_ = dims.size();
  for (int i = 0; i < bias_param->ndim_; i++) {
  for (size_t i = 0; i < bias_param->ndim_; i++) {
    bias_param->in_shape0_[i] = dims[i];
    bias_param->in_shape1_[i] = 1;
    bias_param->out_shape_[i] = dims[i];
--- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
@@ -65,9 +65,6 @@ int ConcatInt8CPUKernel::ReSize() {
  if (ret != RET_OK) {
    return ret;
  }
  if (concat_param_->input_shapes_ != nullptr) {
    //    free(concat_param_->input_shapes_);
  }
  auto input_num = in_tensors_.size();
  concat_param_->input_num_ = input_num;
  concat_param_->input_shapes_ = reinterpret_cast<const int **>(malloc(sizeof(int *) * input_num));
@@ -82,7 +79,7 @@ int ConcatInt8CPUKernel::ReSize() {

  int64_t after_axis_size = 1;
  auto output_tensor = out_tensors_.at(kOutputIndex);
  int output_dim = output_tensor->shape().size();
  size_t output_dim = output_tensor->shape().size();
  concat_param_->output_shapes_ = output_tensor->shape().data();
  for (size_t i = axis_ + 1; i < output_dim; i++) {
    after_axis_size *= concat_param_->output_shapes_[i];
@@ -102,7 +99,7 @@ int ConcatInt8CPUKernel::Run() {
  count_unit_ = thread_count_ > 1 ? UP_DIV(before_axis_size, thread_count_) : before_axis_size;
  concat_param_->count_unit_ = count_unit_;

  for (size_t i = 0; i < input_num; i++) {
  for (int i = 0; i < input_num; i++) {
    input_data_[i] = static_cast<int8_t *>(in_tensors_.at(i)->Data());
  }
  output_data_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data());
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
@@ -122,7 +122,7 @@ int DeConvInt8CPUKernel::InitParam() {

  /* optimize normal -> same data layout */
  input_trans_func_ = RowMajor2Row16x4MajorInt8;
  size_t oc4 = UP_DIV(conv_param_->output_channel_, C4NUM);
  int oc4 = UP_DIV(conv_param_->output_channel_, C4NUM);
  thread_count_ = MSMIN(op_parameter_->thread_num_, oc4);
  thread_stride_ = UP_DIV(oc4, thread_count_);
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc
@@ -38,7 +38,7 @@ int MatmulInt8CPUKernel::ReSize() {
  int batch = 1;
  auto x_shape = in_tensors_[0]->shape();
  auto o_shape = out_tensors_[0]->shape();
  for (int i = 0; i < x_shape.size() - 2; ++i) {
  for (size_t i = 0; i < x_shape.size() - 2; ++i) {
    batch *= x_shape[i];
  }
  params_->batch = batch;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/prelu_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/prelu_int8.cc
@@ -57,6 +57,7 @@ int PreluInt8CPUKernel::ReSize() {
  quant_prelu_parm_->element_num = in_tensors_[0]->Size();
  quant_prelu_parm_->in_shape_ = input_tensor->shape().data();
  quant_prelu_parm_->out_shape_ = out_tensor->shape().data();
  return RET_OK;
 }

 int PreluInt8CPUKernel::Run() {
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc
@@ -184,8 +184,8 @@ int ReduceInt8CPUKernel::MallocTmpBuffer() {
  for (auto i = 0; i < num_axes_ - 1; i++) {
    int axis = axes_[i];
    size_t size = 1;
    for (auto j = 0; j < input_shape.size(); j++) {
      if (static_cast<size_t>(axis) != j) {
    for (size_t j = 0; j < input_shape.size(); j++) {
      if (axis != static_cast<int>(j)) {
        size *= input_shape[j];
      }
    }
@@ -258,7 +258,7 @@ int ReduceInt8CPUKernel::Run() {
  tmp_shape_ = in_tensors_.at(0)->shape();
  src_data_ = begin_src_data_;

  for (int i = 0; i < data_buffers_.size(); ++i) {
  for (size_t i = 0; i < data_buffers_.size(); ++i) {
    if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
      quant_arg_.mean_multiplier_ = mean_multipliers_[i]->multiplier_;
      quant_arg_.mean_left_shift_ = mean_multipliers_[i]->left_shift_;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
@@ -133,7 +133,7 @@ int SqueezeInt8CPUKernel::Run() {
    auto input_type = in_tensors_[i]->data_type();
    if (input_type == kNumberTypeUInt8) {
      uint8_t *input_tmp = reinterpret_cast<uint8_t *>(in_tensors_[i]->Data());
      for (size_t j = 0; j < input_size; j++) {
      for (int j = 0; j < input_size; j++) {
        inputs_array[i][j] = (int8_t)(input_tmp[j] - 128);
      }
      for (size_t j = 0; j < input_dim; j++) {
@@ -148,12 +148,12 @@ int SqueezeInt8CPUKernel::Run() {
  auto output_type = out_tensors_[0]->data_type();
  if (output_type == kNumberTypeUInt8) {
    auto output_size = quant_Squeeze_parm_->output_size_;
    for (size_t i = 0; i < output_size; i++) {
    for (int i = 0; i < output_size; i++) {
      output_addr[i] = (uint8_t)(output_addr[i] + 128);
    }
  }

  for (int i = 0; i < input_dim; i++) {
  for (size_t i = 0; i < input_dim; i++) {
    free(*(inputs_array + i));
  }

--- a/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc
@@ -40,7 +40,7 @@ int TopKInt8CPUKernel::ReSize() {
  lite::tensor::Tensor *input = in_tensors_.at(0);
  parameter->last_dim_size_ = input->shape()[input->shape().size() - 1];
  parameter->loop_num_ = 1;
  for (int i = 0; i < input->shape().size() - 1; ++i) {
  for (size_t i = 0; i < input->shape().size() - 1; ++i) {
    parameter->loop_num_ *= input->shape()[i];
  }
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.h
@@ -44,13 +44,13 @@ class Unsqueezeint8CPUKernel : public LiteKernel {
 private:
  UnSqueezeQuantArg *quant_Unsqueeze_parm_;
  UnSqueezeParameter *Unsq_para_;
  int thread_count_;
  int thread_sz_count_;
  int thread_sz_stride_;
  int data_size_;
  float *in_ptr_;
  float *out_ptr_;
  const Context *ctx_;
  int thread_count_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_common.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_common.c
@@ -61,7 +61,7 @@ void ComputeStrides(int *shape, int *strides, int ndim) {
 }

 void CalcMultiplesAndStrides(ArithmeticParameter *param) {
  for (auto i = 0; i < param->ndim_; i++) {
  for (size_t i = 0; i < param->ndim_; i++) {
    param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i];
    param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i];
  }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/batch_to_space.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/batch_to_space.c
@@ -50,7 +50,6 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i
                         const int *crops, int data_size) {
  int block_h = block[0];
  int block_w = block[1];
  int in_n = in_shape[0];
  int in_h = in_shape[1];
  int in_w = in_shape[2];
  int in_c = in_shape[3];
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.c
@@ -69,7 +69,7 @@ int DeConvPostFp32C8x8(const float *src, float *tmp, const float *bias, float *d
            int src_index = ih * src_ih_stride + iw * src_iw_stride + kh * src_kh_stride + kw * src_kw_stride;
            int dst_index = oh * dst_oh_stride + ow * dst_ow_stride + kh * dst_kh_stride + kw * dst_kw_stride;
            float *tmp_dst = dst_ptr + dst_index;
            float *tmp_src = src_ptr + src_index;
            const float *tmp_src = src_ptr + src_index;
 #ifdef ENABLE_ARM64
            asm volatile(
              "mov x0, %[tmp_src] \n"
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.c
@@ -35,8 +35,6 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar
  int scale = param->scale_;
  int pooled_height = param->pooledH_;
  int pooled_width = param->pooledW_;
  int *in_strides = &(param->in_strides_);
  int *out_strides = &(param->out_strides_);
  int roi_stride = 5;
  int roi_ind_st = roi_st * roi_stride;
  float *max_c = malloc(channels_ * sizeof(float));
@@ -55,9 +53,8 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar

    float bin_size_h = (float)roi_height / (float)pooled_height;
    float bin_size_w = (float)roi_width / (float)pooled_width;
    float *batch_data = in_ptr + in_strides[kNHWC_N] * roi_batch_ind;
    float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind;

    int out_ind = i * out_strides[0];
    for (int ph = 0; ph < pooled_height; ++ph) {
      for (int pw = 0; pw < pooled_width; ++pw) {
        int hstart = (int)floorf(ph * bin_size_h);     // block xi_1
@@ -76,17 +73,17 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar
            max_c[j] = 0;
          }
        }
        int pooled_index = i * out_strides[0] + ph * out_strides[1] + pw * out_strides[2];
        int bd_index = hstart * in_strides[1];
        int pooled_index = i * param->out_strides_[0] + ph * param->out_strides_[1] + pw * param->out_strides_[2];
        int bd_index = hstart * param->in_strides_[1];
        for (int h = hstart; h < hend; ++h) {
          int wi = bd_index + wstart * in_strides[2];
          int wi = bd_index + wstart * param->in_strides_[2];
          for (int w = wstart; w < wend; ++w) {
            for (int c = 0; c < channels_; ++c) {
              max_c[c] = MSMAX(batch_data[wi + c], max_c[c]);
            }
            wi += in_strides[2];
            wi += param->in_strides_[2];
          }  // in_w end;
          bd_index += in_strides[1];
          bd_index += param->in_strides_[1];
        }  // in_h end
        for (int j = 0; j < channels_; ++j) {
          out_ptr[pooled_index + j] = max_c[j];
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/space_to_batch.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/space_to_batch.c
@@ -141,7 +141,7 @@ int SpaceToBatch(const float *input, float *output, SpaceToBatchParameter param,
  if (input == NULL || output == NULL) {
    return NNACL_NULL_PTR;
  }
  auto ret =
  int ret =
    SpaceToBatchForNHWC(input, output, param.padded_in_shape_, param.n_dims_, param.block_sizes_, h_start, h_end);
  return ret;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batch_to_space_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batch_to_space_int8.c
@@ -58,7 +58,6 @@ void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_
                             const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  int block_h = block[0];
  int block_w = block[1];
  int in_n = in_shape[0];
  int in_h = in_shape[1];
  int in_w = in_shape[2];
  int in_c = in_shape[3];
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/crop_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/crop_int8.c
@@ -31,7 +31,7 @@ void Crop(const int8_t *input, int8_t *output, int task_id, CropParameter *para)
      Crop3D(input, output, task_id, para);
      break;
    case 4:
      Crop4D(input, output, task_id, para);
      Int8Crop4D(input, output, task_id, para);
      break;
  }
 }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reduce_int8.c
@@ -266,7 +266,7 @@ int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis
      }
      int32_t tmp_scaled =
        RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
                              (tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_ + base_offset),
                              (tmp - quant->in_zp_) * (1 << ((unsigned int)quant->in_out_left_shift_ + base_offset)),
                              quant->in_out_multiplier_),
                            quant->in_out_right_shift_ + base_offset);
      if (isAddOverflow(tmp_scaled, quant->out_zp_)) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/resize.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/resize.c
@@ -33,14 +33,13 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int

  int32_t new_height = output_shape[1];
  int32_t new_width = output_shape[2];
  int32_t height_scale, width_scale;
  int32_t height_scale = 0, width_scale = 0;
  ComputeScale(in_h, new_height, align_corners, &height_scale);
  ComputeScale(in_w, new_width, align_corners, &width_scale);

  int n, h, w, c;
  for (n = 0; n < in_n; n++) {
    for (h = tid; h < new_height; h += thread_num) {
      //      float actual_y = (float)h * height_scale;
      const int base_offset = 20;
      int scaled_actual_y;
      int bottom, top;
@@ -99,10 +98,10 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat

  for (batch = 0; batch < output_shape[0]; batch++) {
    for (y = tid; y < output_shape[1]; y += thread_num) {
      int input_y;
      int input_y = 0;
      ComputeNearestNeighborInt(y, in_h, new_height, align_corners, &input_y);
      for (x = 0; x < output_shape[2]; x++) {
        int input_x;
        int input_x = 0;
        ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x);
        int in_offset = offset(input_shape, batch, input_y, input_x, 0);
        int out_offset = offset(output_shape, batch, y, x, 0);
@@ -159,10 +158,10 @@ int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, con

  for (batch = 0; batch < output_shape[0]; batch++) {
    for (y = tid; y < output_shape[1]; y += thread_num) {
      int input_y;
      int input_y = 0;
      ComputeNearestNeighborInt(y, in_h, new_height, align_corners, &input_y);
      for (x = 0; x < output_shape[2]; x++) {
        int input_x;
        int input_x = 0;
        ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x);
        for (c = 0; c < output_shape[3]; c++) {
          int in_offset = offset(input_shape, batch, input_y, input_x, c);
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
@@ -961,7 +961,7 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int
 #endif
      }
      for (; c < channel; c++) {
        float *src_ptr = src_batch + hw * channel + c;
        const float *src_ptr = src_batch + hw * channel + c;
        float *dst_ptr = dst_batch + c * plane + hw;
        for (size_t i = 0; i < C8NUM; i++) {
          dst_ptr[i] = src_ptr[i * channel];
@@ -969,7 +969,7 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int
      }
    }
    for (; hw < plane; hw++) {
      float *src_ptr = src_batch + hw * channel;
      const float *src_ptr = src_batch + hw * channel;
      float *dst_ptr = dst_batch + hw;
      for (size_t i = 0; i < channel; i++) {
        dst_ptr[i * plane] = src_ptr[i];
@@ -1023,10 +1023,10 @@ void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter
  int unit = conv_param->input_h_ * conv_param->input_w_;

  for (int b = 0; b < conv_param->input_batch_; b++) {
    int8_t *src_b = src + b * unit * conv_param->input_channel_;
    const int8_t *src_b = src + b * unit * conv_param->input_channel_;
    int16_t *dst_b = dst + b * unit * ic4 * C4NUM;
    for (int k = 0; k < unit; k++) {
      int8_t *src_k = src_b + k * conv_param->input_channel_;
      const int8_t *src_k = src_b + k * conv_param->input_channel_;
      int16_t *dst_k = dst_b + k * ic4 * C4NUM;
      for (int c = 0; c < conv_param->input_channel_; c++) {
        dst_k[c] = (int16_t)(src_k[c] - input_zp);
@@ -1044,10 +1044,10 @@ void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight
    }
    int c4_block_num = c / C4NUM;
    int c4_block_rem = c % C4NUM;
    int8_t *src_c = origin_weight + c * unit;
    const int8_t *src_c = origin_weight + c * unit;
    int16_t *dst_c = packed_weight_ + c4_block_num * unit * C4NUM;
    for (int k = 0; k < unit; k++) {
      int8_t *src_kernel = src_c + k;
      const int8_t *src_kernel = src_c + k;
      int16_t *dst_kernel = dst_c + C4NUM * k + c4_block_rem;
      *dst_kernel = (int16_t)(src_kernel[0] - weight_zp);
    }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.c
@@ -30,14 +30,14 @@ void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantiz
  if (quantized_multiplier == NULL || right_shift == NULL) {
    return;
  }
  int shift;
  int shift = 0;
  QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
  *right_shift = -shift;
 }

 void QuantizeRoundParameter(double double_multiplier, int32_t *quantized_multiplier, int *left_shift,
                            int *right_shift) {
  int shift;
  int shift = 0;
  QuantizeMultiplierSmallerThanOne(double_multiplier, quantized_multiplier, &shift);
  shift = -shift;
  if (shift < 0) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_transform.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_transform.c
@@ -913,7 +913,7 @@ void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weigh
    int src_oc_offset = o * iC8 * C8NUM * kernel_plane;
    int dst_oc_offset = oc4_block_num * C4NUM * iC8 * C8NUM * input_unit * input_unit + oc4_block_rem;
    for (int i = 0; i < iC8; i++) {
      int16_t *src_ic8_ptr = weight_data + src_oc_offset + i * kernel_plane * C8NUM;
      const int16_t *src_ic8_ptr = weight_data + src_oc_offset + i * kernel_plane * C8NUM;
      int16_t *dst_ic8_ptr = trans_weight + dst_oc_offset + i * C4NUM * C8NUM;
 #ifdef ENABLE_ARM
      int16x8_t g00 = vld1q_s16(src_ic8_ptr);
@@ -1107,7 +1107,7 @@ void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weigh
      dst_ic8_ptr[28 + 15 * dst_step] = m33[7];
 #else
      for (int j = 0; j < C8NUM; j++) {
        int16_t *local_ptr = src_ic8_ptr + j;
        const int16_t *local_ptr = src_ic8_ptr + j;
        int16_t dst00 = local_ptr[0] * 2;
        int16_t dst01 = (local_ptr + 8)[0] * 2;
        int16_t dst02 = (local_ptr + 16)[0] * 2;
--- a/mindspore/lite/src/runtime/parallel_executor.cc
+++ b/mindspore/lite/src/runtime/parallel_executor.cc
@@ -29,6 +29,7 @@ int ParallelExecutor::Prepare(std::vector<mindspore::kernel::LiteKernel *> &kern
  for (mindspore::kernel::LiteKernel *kernel : kernels) {
    refCount[kernel] = kernel->out_kernels().size();
  }
  return RET_OK;
 }

 void ParallelExecutor::PrepareReadyKernels(const std::vector<mindspore::kernel::LiteKernel *> &kernels) {
--- a/mindspore/lite/src/runtime/thread_pool.cc
+++ b/mindspore/lite/src/runtime/thread_pool.cc
@@ -235,17 +235,17 @@ bool ThreadPool::SetThreadPool() {
  } else if (localMaxThreadNums > kDefaultMaxThreadNums) {
    localMaxThreadNums = kDefaultMaxThreadNums;
  }
  if (configThreadNums > kDefaultMaxThreadNums) {
  if (configThreadNums > static_cast<int>(kDefaultMaxThreadNums)) {
    configThreadNums = kDefaultMaxThreadNums;
  }
  int addNum = 0;
  if (configThreadNums > kDefaultMaxThreadNums) {
  if (configThreadNums > static_cast<int>(kDefaultMaxThreadNums)) {
    addNum = configThreadNums - curThreadRunNums;
  } else if (localMaxThreadNums > curThreadNums) {
  } else if (static_cast<int>(localMaxThreadNums) > curThreadNums) {
    addNum = localMaxThreadNums - curThreadNums;
  }
  AddNewThread(addNum);
  if (curThreadRunNums > localMaxThreadNums) {
  if (curThreadRunNums > static_cast<int>(localMaxThreadNums)) {
    SubRunThread(localMaxThreadNums);
  } else {
    AddRunThread(localMaxThreadNums);
@@ -376,7 +376,7 @@ bool ThreadPool::DistributeTask(ThreadPoolTask *task, int numTask) {

 void ThreadPool::AddRunThread(int num) {
  int activeNums = num - curThreadRunNums;
  if (activeNums <= 0 || activateList.size() < activeNums) {
  if (activeNums <= 0 || static_cast<int>(activateList.size()) < activeNums) {
    return;
  }
  for (int i = curThreadRunNums - 1, j = 0; j < activeNums; ++i, ++j) {
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@@ -6,6 +6,10 @@ include_directories(${TOP_DIR})
 include_directories(${TEST_DIR})
 include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/dependency_gtest.cmake)

 string(REPLACE " -Werror " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
 string(REPLACE " -Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 STRING(REPLACE " fvisibility=hidden " " -fvisibility=default " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
 STRING(REPLACE " fvisibility=hidden " " -fvisibility=default " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 ### anf src
 set(ANF_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/../../core/ir/meta_tensor.cc
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -28,6 +28,10 @@

 namespace mindspore {
 namespace lite {
 static const char *DELIM_COLON = ":";
 static const char *DELIM_COMMA = ",";
 static const char *DELIM_SLASH = "/";

 int Benchmark::GenerateRandomData(size_t size, void *data) {
  MS_ASSERT(data != nullptr);
  char *castedData = static_cast<char *>(data);
@@ -85,7 +89,7 @@ int Benchmark::ReadInputFile() {
    MS_LOG(ERROR) << "Not supported image input";
    return RET_ERROR;
  } else {
    for (auto i = 0; i < _flags->input_data_list.size(); i++) {
    for (size_t i = 0; i < _flags->input_data_list.size(); i++) {
      auto cur_tensor = msInputs.at(i);
      MS_ASSERT(cur_tensor != nullptr);
      size_t size;
--- a/mindspore/lite/tools/common/graph_util.cc
+++ b/mindspore/lite/tools/common/graph_util.cc
@@ -35,16 +35,6 @@ OpDefCopyer GetSimpleOpCopyer() {
    newCNode->quantType = inCNode->quantType;
    newCNode->primitive = std::make_unique<schema::PrimitiveT>();
    newCNode->primitive->value.type = inCNode->primitive->value.type;
    //    newCNode->quantParam.clear();
    //    for (size_t i = 0; i < inCNode->quantParam.size(); i++) {
    //      auto &quantParam = inCNode->quantParam.at(i);
    //      auto quantParamCopy = CopyQuantParamArrayT(quantParam);
    //      if (quantParamCopy == nullptr) {
    //        //MS_LOG(ERROR)("CopyQuantParamArray return nullptr, node: %s", inOpDef->name.c_str());
    //        return nullptr;
    //      }
    //      newCNode->quantParam.emplace_back(std::move(quantParamCopy));
    //    }
    return std::move(newCNode);
  };
 }
@@ -139,20 +129,18 @@ STATUS IsolateNode(schema::MetaGraphT *graphT, CNodeT *node) {
  auto inputTensorIdxes = node->inputIndex;
  auto outputTensorIdxes = node->outputIndex;
  if (inputTensorIdxes.empty()) {
    // MS_LOG(ERROR)("Node %s should has no inputs", node->name.c_str());
    MS_LOG(ERROR) << "Node " << node->name.c_str() << "should has no inputs";
    return RET_ERROR;
  }
  if (outputTensorIdxes.size() != 1) {
    // MS_LOG(ERROR)("FakeQuantNode %s should has 1 output, in fact: %zu", node->name.c_str(),
    // outputTensorIdxes.size());
    MS_LOG(ERROR) << "FakeQuantNode " << node->name.c_str() \
                  << "should has 1 output, in fact: " << outputTensorIdxes.size();
    return RET_ERROR;
  }
  auto inDataTensorIdx = inputTensorIdxes.front();
  auto outDataTensorIdx = outputTensorIdxes.front();

  MS_ASSERT(graphT->allTensors.size() > inDataTensorIdx);
  const auto &inDataTensor = graphT->allTensors.at(inDataTensorIdx);
  MS_ASSERT(inDataTensor != nullptr);
  auto &gOutTensorIdx = graphT->outputIndex;
  for (auto iter = gOutTensorIdx.begin(); iter != gOutTensorIdx.end(); iter++) {
    if (*iter == outDataTensorIdx) {
@@ -186,20 +174,13 @@ STATUS IsolateNode(schema::MetaGraphT *graphT, CNodeT *node) {

 STATUS IsolateOneWayNode(schema::MetaGraphT *graph, size_t subGraphIdx, size_t nodeIdx, bool removeTensor) {
  MS_ASSERT(graph != nullptr);
  /*
  if (graph->subgraphs.size() <= subGraphIdx) {
    //MS_LOG(ERROR)("subGraphIdx out of range: %zu", subGraphIdx);
    return RET_PARAM_INVALID;
  }
  */
  // return IsolateOneWayNode(graph->subgraphs.at(subGraphIdx).get(), nodeIdx, removeTensor);
  return IsolateOneWayNode(graph, nodeIdx, removeTensor);
 }

 STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool removeTensor) {
  MS_ASSERT(graphT != nullptr);
  if (graphT->nodes.size() <= nodeIdx) {
    // MS_LOG(ERROR)("nodeIdx out of range: %zu", nodeIdx);
    MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx;
    return RET_PARAM_INVALID;
  }

@@ -208,11 +189,11 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool remove
  auto outputTensorIdxes = node->outputIndex;
  auto preNodeIdxes = GetInputNodeIdx(*graphT, nodeIdx);
  if (preNodeIdxes.size() > 1 || outputTensorIdxes.size() > 1) {
    // MS_LOG(ERROR)("Only support node who has no more than one input and one output");
    MS_LOG(ERROR) << "Only support node who has no more than one input and one output";
    return RET_ERROR;
  }
  if (inputTensorIdxes.empty()) {
    // MS_LOG(ERROR)("Error, %zuth node has no input tensor", nodeIdx);
    MS_LOG(ERROR) << "Error, " << nodeIdx << "th node has no input tensor";
    return RET_ERROR;
  }
  auto inDataTensorIdx = inputTensorIdxes.front();
@@ -247,7 +228,7 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool remove
    // remove all node's outputTensors
    auto status = RemoveTensor(graphT, outputTensorIdxes);
    if (status != RET_OK) {
      // MS_LOG(ERROR)("RemoveOutputTensors of node %s failed", node->name.c_str());
      MS_LOG(ERROR) << "RemoveOutputTensors of node " << node->name.c_str() << "failed";
      return RET_ERROR;
    }
  }
@@ -270,7 +251,7 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, CNodeT *node, bool removeTe
    }
  }
  if (!isSubNode) {
    // MS_LOG(ERROR)("Node %s is not in graphT %s", node->name.c_str(), graphT->name.c_str());
    MS_LOG(ERROR) << "Node " << node->name.c_str() << "is not in graphT " << graphT->name.c_str();
    return RET_PARAM_INVALID;
  } else {
    return IsolateOneWayNode(graphT, nodeIdx, removeTensor);
@@ -343,7 +324,7 @@ STATUS UpdateNodeIndex(CNodeT *node, uint32_t deleteIdx) {
 STATUS AddTensor2Node(schema::MetaGraphT *graphT, uint32_t nodeIdx, std::unique_ptr<TensorT> tensor,
                      InsertPlace place) {
  if (nodeIdx >= graphT->nodes.size()) {
    // MS_LOG(ERROR)("nodeIdx out of range: %du", nodeIdx);
    MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx;
    return RET_PARAM_INVALID;
  }
  graphT->allTensors.emplace_back(std::move(tensor));
@@ -360,16 +341,16 @@ STATUS AddTensor2Node(schema::MetaGraphT *graphT, uint32_t nodeIdx, std::unique_
 STATUS ReplaceTensorOfNode(schema::MetaGraphT *graphT, uint32_t nodeIdx, uint32_t inTensorIdx,
                           std::unique_ptr<TensorT> tensor) {
  if (nodeIdx >= graphT->nodes.size()) {
    // MS_LOG(ERROR)("nodeIdx out of range: %du", nodeIdx);
    MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx;
    return RET_PARAM_INVALID;
  }
  auto node = graphT->nodes.at(nodeIdx).get();
  if (inTensorIdx >= graphT->allTensors.size()) {
    // MS_LOG(ERROR)("inTensorIdx out of range: %du", nodeIdx);
    MS_LOG(ERROR) << "inTensorIdx out of range: " << nodeIdx;
    return RET_PARAM_INVALID;
  }
  if (!IsContain(node->inputIndex, inTensorIdx)) {
    // MS_LOG(ERROR)("inTensorIdx(%du) is not a inputIdx of node(%du)", inTensorIdx, nodeIdx);
    MS_LOG(ERROR) << "inTensorIdx(" << inTensorIdx << ") is not a inputIdx of node(" << nodeIdx << ")";
    return RET_PARAM_INVALID;
  }
  graphT->allTensors.at(inTensorIdx).swap(tensor);
@@ -379,7 +360,7 @@ STATUS ReplaceTensorOfNode(schema::MetaGraphT *graphT, uint32_t nodeIdx, uint32_
 NodeIter InsertNode(schema::MetaGraphT *graphT, uint32_t existNodeIdx, InsertPlace place, size_t inoutIndex,
                    std::unique_ptr<CNodeT> toAddNode, STATUS *errorCode, OpDefCopyer opDefCopyer) {
  if (existNodeIdx >= graphT->nodes.size()) {
    // MS_LOG(ERROR)("nodeIdx out of range: %du", existNodeIdx);
    MS_LOG(ERROR) << "nodeIdx out of range: " << existNodeIdx;
    return graphT->nodes.end();
  }
  auto nodeIter = graphT->nodes.begin() + existNodeIdx;
@@ -447,17 +428,14 @@ NodeIter InsertNodeBefore(schema::MetaGraphT *graphT, NodeIter existNodeIter, si
    existNodeIter++;
  } else {
    std::vector<std::unique_ptr<CNodeT>> toAddNodes;
    int i = 0;
    for (size_t preNodeIdx : preNodeIdxes) {
      MS_ASSERT(graphT->nodes.size() > preNodeIdx);
      auto &preNode = graphT->nodes.at(preNodeIdx);
      MS_ASSERT(preNode != nullptr);
    for (size_t i = 0; i < preNodeIdxes.size(); i++) {
      MS_ASSERT(graphT->nodes.size() > preNodeIdxes.at(i));
      auto &preTensor = graphT->allTensors.at(preTensorIdx);
      MS_ASSERT(preTensor != nullptr);
      auto toAddTensor = CopyTensorDefT(preTensor);
      if (toAddTensor == nullptr) {
        *errorCode = RET_NULL_PTR;
        // MS_LOG(ERROR)("Copy TensorT failed");
        MS_LOG(ERROR) << "Copy TensorT failed";
        return graphT->nodes.end();
      }
      if (toAddNodeIn->primitive->value.type == schema::PrimitiveType_QuantDTypeCast) {
@@ -468,7 +446,7 @@ NodeIter InsertNodeBefore(schema::MetaGraphT *graphT, NodeIter existNodeIter, si
      size_t toAddTensorIdx = graphT->allTensors.size() - 1;
      auto toAddNode = opDefCopyer(toAddNodeIn.get());
      if (toAddNode == nullptr) {
        // MS_LOG(ERROR)("copy toAddNodeIn failed");
        MS_LOG(ERROR) << "copy toAddNodeIn failed";
        *errorCode = RET_NULL_PTR;
        return graphT->nodes.end();
      }
@@ -509,7 +487,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz
    MS_ASSERT(postTensor != nullptr);
    auto toAddTensor = CopyTensorDefT(postTensor);
    if (toAddTensor == nullptr) {
      // MS_LOG(ERROR)("Copy TensorT failed");
      MS_LOG(ERROR) << "Copy TensorT failed";
      *errorCode = RET_NULL_PTR;
      return graphT->nodes.end();
    }
@@ -521,7 +499,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz
    size_t toAddTensorIdx = graphT->allTensors.size() - 1;
    auto toAddNode = opDefCopyer(toAddNodeIn.get());
    if (toAddNode == nullptr) {
      // MS_LOG(ERROR)("copy toAddNodeIn failed");
      MS_LOG(ERROR) << "copy toAddNodeIn failed";
      *errorCode = RET_NULL_PTR;
      return graphT->nodes.end();
    }
@@ -548,7 +526,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz
      MS_ASSERT(postTensor != nullptr);
      auto toAddTensor = CopyTensorDefT(postTensor);
      if (toAddTensor == nullptr) {
        // MS_LOG(ERROR)("Copy TensorT failed");
        MS_LOG(ERROR) << "Copy TensorT failed";
        *errorCode = RET_NULL_PTR;
        return graphT->nodes.end();
      }
@@ -560,7 +538,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz
      size_t toAddTensorIdx = graphT->allTensors.size() - 1;
      auto toAddNode = opDefCopyer(toAddNodeIn.get());
      if (toAddNode == nullptr) {
        // MS_LOG(ERROR)("copy toAddNodeIn failed");
        MS_LOG(ERROR) << "copy toAddNodeIn failed";
        *errorCode = RET_NULL_PTR;
        return graphT->nodes.end();
      }
@@ -612,12 +590,12 @@ std::string GetModelName(const std::string &modelFile) {

 OpGraphT *OpGraphT::Build(const schema::MetaGraphT *subGraphDef) {
  if (subGraphDef == nullptr) {
    // MS_LOG(ERROR)("subGraphDef is nullptr");
    MS_LOG(ERROR) << "subGraphDef is nullptr";
    return nullptr;
  }
  auto graph = std::unique_ptr<OpGraphT>(new OpGraphT());
  if (graph == nullptr) {
    // MS_LOG(ERROR)("malloc opgraph failed");
    MS_LOG(ERROR) << "malloc opgraph failed";
    return nullptr;
  }

@@ -626,7 +604,7 @@ OpGraphT *OpGraphT::Build(const schema::MetaGraphT *subGraphDef) {
  for (auto &opDef : opDefs) {
    auto ret = graph->AddEdge(opDef.get(), &opDefs);
    if (ret != RET_OK) {
      // MS_LOG(ERROR)("%s add edge failed. ret:%d", opDef->name.c_str(), ret);
      MS_LOG(ERROR) << opDef->name.c_str() << " add edge failed. ret: " << ret;
      return nullptr;
    }
  }
@@ -644,7 +622,7 @@ int OpGraphT::AddEdge(const schema::CNodeT *srcNodeDef, const std::vector<std::u
    for (auto &dstNodeDef : *nodeDefs) {
      bool find = false;
      auto inputIndex = dstNodeDef->inputIndex;
      if (std::any_of(inputIndex.begin(), inputIndex.end(), [&index](int i) { return i == index; })) {
      if (std::any_of(inputIndex.begin(), inputIndex.end(), [&index](size_t i) { return i == index; })) {
        find = true;
      }

@@ -664,13 +642,13 @@ int OpGraphT::AddEdge(const schema::CNodeT *srcNodeDef, const std::vector<std::u
 int OpGraphT::AddEdge(NODE_ID srcId, NODE_ID dstId) {
  auto srcNode = AddNode(srcId);
  if (srcNode == nullptr) {
    // MS_LOG(ERROR)("add srcNode failed");
    MS_LOG(ERROR) << "add srcNode failed";
    return RET_ERROR;
  }
  srcNode->AddOutEdge(dstId);
  auto dstNode = AddNode(dstId);
  if (dstNode == nullptr) {
    // MS_LOG(ERROR)("add dstNode failed");
    MS_LOG(ERROR) << "add dstNode failed";
    return RET_ERROR;
  }
  dstNode->AddInEdge(srcId);
--- a/mindspore/lite/tools/common/node_util.cc
+++ b/mindspore/lite/tools/common/node_util.cc
@@ -109,12 +109,79 @@ STATUS NodeUtils::ConvertDims(mindspore::lite::Format src_format, const std::vec
      }
      break;
    default:
      // MS_LOG(ERROR)("Not support dst format: %d", dst_format);
      MS_LOG(ERROR) << "Not support dst format: " << dst_format;
      return RET_ERROR;
  }
  return RET_OK;
 }

 STATUS GetFilterDim(const std::vector<int32_t> &oriDims, kTransFilterType type, int32_t* filterK, int32_t* filterC,
                           int32_t* filterH, int32_t* filterW) {
  MS_ASSERT(oriDims.size() == 4);
  if (type == kKCHW2HWCK || type == kKCHW2HWKC || type == kKCHW2KHWC || type == kKCHW2CKHW) {
    *filterK = oriDims.at(KCHW_K);
    *filterC = oriDims.at(KCHW_C);
    *filterH = oriDims.at(KCHW_H);
    *filterW = oriDims.at(KCHW_W);
  } else if (type == kCKHW2HWCK || type == kCKHW2HWKC || type == kCKHW2KHWC) {
    *filterC = oriDims.at(CKHW_C);
    *filterK = oriDims.at(CKHW_K);
    *filterH = oriDims.at(CKHW_H);
    *filterW = oriDims.at(CKHW_W);
  } else if (type == kHWCK2KCHW || type == kHWCK2CKHW) {
    *filterH = oriDims.at(HWCK_H);
    *filterW = oriDims.at(HWCK_W);
    *filterC = oriDims.at(HWCK_C);
    *filterK = oriDims.at(HWCK_K);
  } else if (type == kHWKC2KCHW || type == kHWKC2CKHW) {
    *filterH = oriDims.at(HWKC_H);
    *filterW = oriDims.at(HWKC_W);
    *filterK = oriDims.at(HWKC_K);
    *filterC = oriDims.at(HWKC_C);
  } else if (type == kNHWC2KCHW || type == kNHWC2HWCK || type == kNHWC2CKHW) {
    *filterK = oriDims.at(NHWC_N);
    *filterH = oriDims.at(NHWC_H);
    *filterW = oriDims.at(NHWC_W);
    *filterC = oriDims.at(NHWC_C);
  } else if (type == kCHWK2HWCK || type == kCHWK2KHWC) {
    *filterC = oriDims.at(CHWK_C);
    *filterH = oriDims.at(CHWK_H);
    *filterW = oriDims.at(CHWK_W);
    *filterK = oriDims.at(CHWK_K);
  } else if (type == kKHWC2HWCK || type == kKHWC2CHWK) {
    *filterK = oriDims.at(KHWC_K);
    *filterH = oriDims.at(KHWC_H);
    *filterW = oriDims.at(KHWC_W);
    *filterC = oriDims.at(KHWC_C);
  } else {
    MS_LOG(ERROR) << "Unsupported transFilterType: " << type;
    return RET_ERROR;
  }
  return RET_OK;
 }

 STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC,
                           int32_t filterH, int32_t filterW) {
  MS_ASSERT(tensor != nullptr);
  if (type == kKCHW2HWCK || type == kCKHW2HWCK || type == kNHWC2HWCK || type == kKHWC2HWCK || type == kCHWK2HWCK) {
    tensor->dims = {filterH, filterW, filterC, filterK};
  } else if (type == kKCHW2HWKC || type == kCKHW2HWKC) {
    tensor->dims = {filterH, filterW, filterK, filterC};
  } else if (type == kHWCK2KCHW || type == kHWKC2KCHW || type == kNHWC2KCHW) {
    tensor->dims = {filterK, filterC, filterH, filterW};
  } else if (type == kHWCK2CKHW || type == kHWKC2CKHW || type == kNHWC2CKHW || type == kKCHW2CKHW) {
    tensor->dims = {filterC, filterK, filterH, filterW};
  } else if (type == kKHWC2CHWK) {
    tensor->dims = {filterC, filterH, filterW, filterK};
  } else if (type == kKCHW2KHWC || type == kCKHW2KHWC || type == kCHWK2KHWC) {
    tensor->dims = {filterK, filterH, filterW, filterC};
  } else {
    MS_LOG(ERROR) << "Unsupported transFilterType: " << type;
    return RET_ERROR;
  }
  return RET_OK;
 }

 STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) {
  if (tensor == nullptr) {
    return RET_NULL_PTR;
--- a/mindspore/lite/tools/common/node_util.h
+++ b/mindspore/lite/tools/common/node_util.h
@@ -75,72 +75,10 @@ enum kTransFilterType {
  kKCHW2CKHW  // 20
 };

 static STATUS GetFilterDim(std::vector<int32_t> &oriDims, kTransFilterType type, int32_t &filterK, int32_t &filterC,
                           int32_t &filterH, int32_t &filterW) {
  MS_ASSERT(oriDims.size() == 4);
  if (type == kKCHW2HWCK || type == kKCHW2HWKC || type == kKCHW2KHWC || type == kKCHW2CKHW) {
    filterK = oriDims.at(KCHW_K);
    filterC = oriDims.at(KCHW_C);
    filterH = oriDims.at(KCHW_H);
    filterW = oriDims.at(KCHW_W);
  } else if (type == kCKHW2HWCK || type == kCKHW2HWKC || type == kCKHW2KHWC) {
    filterC = oriDims.at(CKHW_C);
    filterK = oriDims.at(CKHW_K);
    filterH = oriDims.at(CKHW_H);
    filterW = oriDims.at(CKHW_W);
  } else if (type == kHWCK2KCHW || type == kHWCK2CKHW) {
    filterH = oriDims.at(HWCK_H);
    filterW = oriDims.at(HWCK_W);
    filterC = oriDims.at(HWCK_C);
    filterK = oriDims.at(HWCK_K);
  } else if (type == kHWKC2KCHW || type == kHWKC2CKHW) {
    filterH = oriDims.at(HWKC_H);
    filterW = oriDims.at(HWKC_W);
    filterK = oriDims.at(HWKC_K);
    filterC = oriDims.at(HWKC_C);
  } else if (type == kNHWC2KCHW || type == kNHWC2HWCK || type == kNHWC2CKHW) {
    filterK = oriDims.at(NHWC_N);
    filterH = oriDims.at(NHWC_H);
    filterW = oriDims.at(NHWC_W);
    filterC = oriDims.at(NHWC_C);
  } else if (type == kCHWK2HWCK || type == kCHWK2KHWC) {
    filterC = oriDims.at(CHWK_C);
    filterH = oriDims.at(CHWK_H);
    filterW = oriDims.at(CHWK_W);
    filterK = oriDims.at(CHWK_K);
  } else if (type == kKHWC2HWCK || type == kKHWC2CHWK) {
    filterK = oriDims.at(KHWC_K);
    filterH = oriDims.at(KHWC_H);
    filterW = oriDims.at(KHWC_W);
    filterC = oriDims.at(KHWC_C);
  } else {
    MS_LOG(ERROR) << "Unsupported transFilterType: " << type;
    return RET_ERROR;
  }
  return RET_OK;
 }

 static STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC,
                           int32_t filterH, int32_t filterW) {
  MS_ASSERT(tensor != nullptr);
  if (type == kKCHW2HWCK || type == kCKHW2HWCK || type == kNHWC2HWCK || type == kKHWC2HWCK || type == kCHWK2HWCK) {
    tensor->dims = {filterH, filterW, filterC, filterK};
  } else if (type == kKCHW2HWKC || type == kCKHW2HWKC) {
    tensor->dims = {filterH, filterW, filterK, filterC};
  } else if (type == kHWCK2KCHW || type == kHWKC2KCHW || type == kNHWC2KCHW) {
    tensor->dims = {filterK, filterC, filterH, filterW};
  } else if (type == kHWCK2CKHW || type == kHWKC2CKHW || type == kNHWC2CKHW || type == kKCHW2CKHW) {
    tensor->dims = {filterC, filterK, filterH, filterW};
  } else if (type == kKHWC2CHWK) {
    tensor->dims = {filterC, filterH, filterW, filterK};
  } else if (type == kKCHW2KHWC || type == kCKHW2KHWC || type == kCHWK2KHWC) {
    tensor->dims = {filterK, filterH, filterW, filterC};
  } else {
    MS_LOG(ERROR) << "Unsupported transFilterType: " << type;
    return RET_ERROR;
  }
  return RET_OK;
 }
 STATUS GetFilterDim(const std::vector<int32_t> &oriDims, kTransFilterType type, int32_t* filterK, int32_t* filterC,
                    int32_t* filterH, int32_t* filterW);
 STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC,
                    int32_t filterH, int32_t filterW);

 template <typename T>
 static STATUS TransFilterData(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC,
@@ -356,7 +294,7 @@ static STATUS TransFilterFormat(schema::TensorT *tensor, kTransFilterType type)
  int32_t filterW;
  int32_t filterC;
  int32_t filterK;
  auto status = GetFilterDim(oriDims, type, filterK, filterC, filterH, filterW);
  auto status = GetFilterDim(oriDims, type, &filterK, &filterC, &filterH, &filterW);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "GetFilterDim failed: " << status;
    return status;
--- a/mindspore/lite/tools/common/tensor_util.cc
+++ b/mindspore/lite/tools/common/tensor_util.cc
@@ -42,107 +42,6 @@ std::unique_ptr<schema::QuantParamT> CopyQuantParamT(const std::unique_ptr<schem
  return std::move(dstQuantParam);
 }

 std::unique_ptr<QuantParamT> CopyQuantParamArrayT(const std::unique_ptr<QuantParamT> &srcQuantParamArray) {
  MS_ASSERT(srcQuantParamArray != nullptr);
  auto dstQuantParamArrayT = std::unique_ptr<QuantParamT>(new (std::nothrow) QuantParamT());
  if (dstQuantParamArrayT == nullptr) {
    // MS_LOG(ERROR)("new dstQuantParamArrayT failed");
    return nullptr;
  }
  /*
  for (size_t i = 0; i < srcQuantParamArray->param.size(); i++) {
    auto &srcQuantParam = srcQuantParamArray->param.at(i);
    MS_ASSERT(srcQuantParam != nullptr);
    std::unique_ptr<QuantParamT> dstQuantParam(new (std::nothrow) QuantParamT());
    if (dstQuantParam == nullptr) {
      //MS_LOG(ERROR)("new dstQuantParam failed");
      dstQuantParamArrayT.release();
      return nullptr;
    }
    dstQuantParam->scale = srcQuantParam->scale;
    dstQuantParam->zeroPoint = srcQuantParam->zeroPoint;
    dstQuantParam->min = srcQuantParam->min;
    dstQuantParam->max = srcQuantParam->max;
    dstQuantParam->narrowRange = srcQuantParam->narrowRange;
    dstQuantParam->numBits = srcQuantParam->numBits;
    dstQuantParamArrayT->param.emplace_back(std::move(dstQuantParam));
  }
  */
  return std::move(dstQuantParamArrayT);
 }

 std::unique_ptr<QuantParamT> GetInTensorQuantParamArray(const MetaGraphT &graphT, size_t tensorIdx) {
  auto preNodeIdxes = GetLinkedPreIdx(graphT, tensorIdx);
  MS_ASSERT(preNodeIdxes.size() <= 1);
  if (preNodeIdxes.empty()) {
    // MS_LOGD("the %zuth tensor has no preNode", tensorIdx);
    return nullptr;
  }
  auto preNodeIdx = preNodeIdxes.front();
  MS_ASSERT(preNodeIdx < graphT.nodes.size());
  auto &preNode = graphT.nodes.at(preNodeIdx);
  MS_ASSERT(preNode != nullptr);
  MS_ASSERT(preNode->inputIndex.size() + preNode->outputIndex.size() == preNode->quantParam.size());
  /*
  for (size_t i = 0; i < preNode->outputIndex.size(); i++) {
    if (preNode->outputIndex.at(i) == tensorIdx) {
      auto &quantPArray = preNode->quantParam.at(preNode->inputIndex.size() + i);
      MS_ASSERT(quantPArray->param.size() == 1);  // only support prelayer
      MS_ASSERT(quantPArray->param.front() != nullptr);
      if (quantPArray->param.front()->min == FLT_MAX) {
        //MS_LOGD("the %zuth tensor's preNode's relative quantParam has not be inited", tensorIdx);
        return nullptr;
      } else {
        return std::move(CopyQuantParamArrayT(quantPArray));
      }
    }
  }
  */
  MS_ASSERT(false);
  return nullptr;
 }

 std::unique_ptr<QuantParamT> GetOutTensorQuantParamArray(const MetaGraphT &graphT, size_t tensorIdx) {
  auto postNodeIdxes = GetLinkedPostIdx(graphT, tensorIdx);
  if (postNodeIdxes.empty()) {
    // MS_LOGD("the %zuth tensor has no postNode", tensorIdx);
    return nullptr;
  }
  // find one postNode which can give valid quantParamArray
  for (auto postNodeIdx : postNodeIdxes) {
    MS_ASSERT(postNodeIdx < graphT.nodes.size());
    auto &postNode = graphT.nodes.at(postNodeIdx);
    MS_ASSERT(postNode != nullptr);
    MS_ASSERT(postNode->inputIndex.size() + postNode->outputIndex.size() == postNode->quantParam.size());
    /*
    for (size_t i = 0; i < postNode->inputIndex.size(); i++) {
      if (postNode->inputIndex.at(i) == tensorIdx) {
        auto &quantPArray = postNode->quantParam.at(i);
        MS_ASSERT(quantPArray->param.size() == 1);  // only support prelayer
        MS_ASSERT(quantPArray->param.front() != nullptr);
        // check if postNode has valid quantParam
        if (quantPArray->param.front()->min == FLT_MAX) {
          continue;
        }
        MS_ASSERT(graphT.allTensors.size() > postNode->inputIndex.at(i));
        auto &tensor = graphT.allTensors.at(postNode->inputIndex.at(i));
        MS_ASSERT(tensor != nullptr);
        if (tensor->refCount == schema::NodeType_ValueNode) {
          continue;
        }
        // find valid quantParam return
        auto paramArray = CopyQuantParamArrayT(quantPArray);
        if (paramArray == nullptr) {
          //MS_LOG(ERROR)("CopyQuantParamArrayT return nullptr");
          return nullptr;
        }
        return std::move(paramArray);
      }
    }*/
  }
  return nullptr;
 }

 size_t GetElementSize(const TensorT &tensor) { return GetElementSize(TypeId(tensor.dataType)); }

 size_t GetElementSize(const TypeId &dataType) {
--- a/mindspore/lite/tools/common/tensor_util.h
+++ b/mindspore/lite/tools/common/tensor_util.h
@@ -58,10 +58,6 @@ std::unique_ptr<schema::QuantParamT> CopyQuantParamT(const std::unique_ptr<schem
 std::unique_ptr<schema::QuantParamT> CopyQuantParamArrayT(
        const std::unique_ptr<schema::QuantParamT> &srcQuantParamArray);

 std::unique_ptr<schema::QuantParamT> GetInTensorQuantParamArray(const schema::MetaGraphT &graphT, size_t tensorIdx);

 std::unique_ptr<schema::QuantParamT> GetOutTensorQuantParamArray(const schema::MetaGraphT &graphT, size_t tensorIdx);

 using MSGraphDefTPtr = std::shared_ptr<schema::MetaGraphT>;

 enum TensorType { CONST = 0, GRAPH_INPUT = 1, OP_OUTPUT = 2, TF_CONST = 3 };
--- a/mindspore/lite/tools/converter/converter.cc
+++ b/mindspore/lite/tools/converter/converter.cc
@@ -37,6 +37,7 @@
 namespace mindspore {
 namespace lite {
 using FmkType = converter::FmkType;
 static const char *DELIM_SLASH = "/";
 Converter::Converter() {
  this->transform = new GraphDefTransform;
  this->anfTransform = new AnfTransform;
--- a/mindspore/lite/tools/converter/legacy_optimizer/fusion/batchnorm_fold_fusion_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/fusion/batchnorm_fold_fusion_pass.cc
@@ -333,7 +333,7 @@ STATUS BatchNormFoldFusionPass::GenNewWeightTensor() {
  void *miData = muTensor->data.data();
  auto *castedMiData = static_cast<float *>(miData);
  size_t stride = weightShapeSize / channelOut;
  for (size_t i = 0; i < channelOut; i++) {
  for (int i = 0; i < channelOut; i++) {
    for (size_t j = 0; j < stride; j++) {
      castedNewWeightData[i * stride + j] = castedOldWeightData[i * stride + j] * castedGammaData[i] / castedMiData[i];
    }
@@ -367,7 +367,7 @@ STATUS BatchNormFoldFusionPass::GenNewBiasTensor() {  // bias has no quant
  MS_ASSERT(sigmaTensor->dataType == DataType_DT_FLOAT);
  void *sigmaData = sigmaTensor->data.data();
  auto *castedSigmaData = static_cast<float *>(sigmaData);
  for (size_t i = 0; i < channelOut; i++) {
  for (int i = 0; i < channelOut; i++) {
    castedNewBiasData[i] = castedBetaData[i] - castedGammaData[i] * castedMiData[i] / castedSigmaData[i];
  }
  return RET_OK;
--- a/mindspore/lite/tools/converter/legacy_optimizer/fusion/format_trans_fusion_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/fusion/format_trans_fusion_pass.cc
@@ -19,8 +19,6 @@
 #include <memory>
 #include "tools/converter/legacy_optimizer/fusion/format_trans_fusion_pass.h"
 #include "utils/log_adapter.h"
 #include "securec/include/securec.h"
 // #include "utils/log_adapter.h"
 #include "tools/common/graph_util.h"
 #include "include/errorcode.h"
 #include "mindspore/lite/schema/inner/model_generated.h"
@@ -44,7 +42,7 @@ STATUS FormatTransFusionPass::DefinePattern() {
    std::unique_ptr<FusionPattern> nc2NhAndNh2NcFusionPattern(new (std::nothrow)
                                                                FusionPattern(kNc2NhAndNh2NcFusionPattern));
    if (nc2NhAndNh2NcFusionPattern == nullptr) {
      // MS_LOG(ERROR) << "new %s failed", kNc2NhAndNh2NcFusionPattern);
      MS_LOG(ERROR) << "new " << kNc2NhAndNh2NcFusionPattern << "failed";
      return RET_ERROR;
    }
    nc2NhAndNh2NcFusionPattern->AddPatternOp(nc2nhOp);
@@ -52,7 +50,6 @@ STATUS FormatTransFusionPass::DefinePattern() {
    nc2NhAndNh2NcFusionPattern->Finish();
    this->patterns.emplace_back(nc2NhAndNh2NcFusionPattern.release());
  }
  // nchw2nhwc + QuantDtypeCast + nhwc2nchw
  {
    auto nc2nhOp = std::make_shared<PatternOp>();
    nc2nhOp->id = kFormatTransNc2NhOp;
@@ -68,7 +65,7 @@ STATUS FormatTransFusionPass::DefinePattern() {
    nh2ncOp->left = passOp;
    std::unique_ptr<FusionPattern> nc2NhAndNh2NcPassFusionPattern(new FusionPattern(kNc2NhAndNh2NcPassFusionPattern));
    if (nc2NhAndNh2NcPassFusionPattern == nullptr) {
      // MS_LOG(ERROR) << "new %s failed", kNc2NhAndNh2NcPassFusionPattern);
      MS_LOG(ERROR) << "new " << kNc2NhAndNh2NcPassFusionPattern << "failed";
      return RET_ERROR;
    }
    nc2NhAndNh2NcPassFusionPattern->AddPatternOp(nc2nhOp);
@@ -90,7 +87,7 @@ STATUS FormatTransFusionPass::DefinePattern() {
    std::unique_ptr<FusionPattern> nh2NcAndNc2NhFusionPattern(new (std::nothrow)
                                                                FusionPattern(kNh2NcAndNc2NhFusionPattern));
    if (nh2NcAndNc2NhFusionPattern == nullptr) {
      // MS_LOG(ERROR) << "new %s failed", kNh2NcAndNc2NhFusionPattern);
      MS_LOG(ERROR) << "new " << kNh2NcAndNc2NhFusionPattern << "failed";
      return RET_ERROR;
    }
    nh2NcAndNc2NhFusionPattern->AddPatternOp(nh2ncOp);
--- a/mindspore/lite/tools/converter/legacy_optimizer/fusion/fusion_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/fusion/fusion_pass.cc
@@ -247,7 +247,7 @@ bool FusionPass::MatchTree(schema::MetaGraphT *graph, size_t nodeIdx, const std:
  // path is setted and not pointer to this node
  if (target->pathSetted) {
    MS_ASSERT(target->path != nullptr);
    if (target->path->nodeIdx != nodeIdx) {
    if (target->path->nodeIdx != static_cast<int>(nodeIdx)) {
      return false;
    }
  }
--- a/mindspore/lite/tools/converter/legacy_optimizer/fusion/matmul_biasadd_fusion_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/fusion/matmul_biasadd_fusion_pass.cc
@@ -108,7 +108,6 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p
  transA = matMulNode->primitive->value.AsMatMul()->transposeA;
  transB = matMulNode->primitive->value.AsMatMul()->transposeB;
  MS_ASSERT(matMulNode->primitive->value.value != nullptr);
  delete (matMulNode->primitive->value.value);
  matMulNode->primitive->value.type = schema::PrimitiveType_FullConnection;
  matMulNode->primitive->value.value = fcAttr.release();

@@ -135,11 +134,6 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p
 STATUS MatMulBiasAddFusionPass::InsertTransposeNode(MetaGraphT *graph, const std::shared_ptr<Path> &matMulPath) {
  MS_ASSERT(graph != nullptr);
  MS_ASSERT(matMulPath != nullptr);
  auto &matMulNode = graph->nodes.at(matMulPath->nodeIdx);
  MS_ASSERT(graph->allTensors.size() > matMulNode->inputIndex.at(0));
  MS_ASSERT(graph->allTensors.size() > matMulNode->inputIndex.at(2));
  const auto &tensorA = graph->allTensors.at(matMulNode->inputIndex.at(0));
  const auto &tensorB = graph->allTensors.at(matMulNode->inputIndex.at(1));

  std::vector<size_t> insertNodeIdxList;
  if (transA) {