!4387 fix coding specification according to cppcheck report

Merge pull request !4387 from 徐安越/master
5 years ago · 05b03fe017
--- a/mindspore/lite/src/ops/power.cc
+++ b/mindspore/lite/src/ops/power.cc
@@ -31,7 +31,7 @@ int Power::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::
  }
  auto output_tensor = outputs[0];
  MS_ASSERT(output_tensor != nullptr);
  if (exp_tensor) {
  if (exp_tensor != nullptr) {
    if (exp_tensor->shape() != x_tensor->shape() || exp_tensor->data_type() != x_tensor->data_type()) {
      MS_LOG(ERROR) << "Power inputs shape or type is not equal!";
      return RET_INPUT_TENSOR_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
@@ -48,7 +48,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso

    case kNumberTypeFloat32: {
      kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx, primitive);
      if (!kernel) {
      if (kernel == nullptr) {
        MS_LOG(ERROR) << "kernel is nullptr.";
        return nullptr;
      }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc
@@ -97,8 +97,8 @@ int Convolution3x3FP16CPUKernel::InitWeightBias() {
 }

 int Convolution3x3FP16CPUKernel::InitTmpBuffer() {
  int tile_num = 16;
  int k_plane = 36;
  const int tile_num = 16;
  const int k_plane = 36;
  int iC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
  int oC8 = UP_DIV(conv_param_->output_channel_, C8NUM);

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
@@ -261,7 +261,7 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Ten
  conv_param->input_w_ = inputs.front()->Width();
  conv_param->output_h_ = outputs.front()->Height();
  conv_param->output_w_ = outputs.front()->Width();
  bool use_winograd;
  bool use_winograd = false;
  int out_unit;
  InputTransformUnitFunc input_trans_func = nullptr;
  OutputTransformUnitFunc output_trans_func = nullptr;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc
@@ -61,7 +61,7 @@ int Convolution3x3CPUKernel::InitWeightBias() {
  oc_block = C8NUM;
  oc_block_num = UP_DIV(output_channel, C8NUM);
 #endif
  int k_plane = 16;
  const int k_plane = 16;
  // init weight
  size_t transformed_size = iC4 * C4NUM * oc_block_num * oc_block * k_plane * sizeof(float);
  transformed_filter_addr_ = reinterpret_cast<float *>(malloc(transformed_size));
@@ -93,7 +93,7 @@ int Convolution3x3CPUKernel::InitWeightBias() {
 int Convolution3x3CPUKernel::InitTmpBuffer() {
  int iC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
  int oC4 = UP_DIV(conv_param_->output_channel_, C4NUM);
  int k_plane = 16;
  const int k_plane = 16;

  /*=============================tile_buffer_============================*/
  size_t tile_buffer_size = thread_count_ * TILE_NUM * k_plane * iC4 * C4NUM * sizeof(float);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc
@@ -52,6 +52,10 @@ void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int
  std::vector<int> strides = trans_weight->GetStride();

  int kernel_plane_stride = channel_in;
  if (oc_block == 0) {
    MS_LOG(ERROR) << "Divide by zero";
    return;
  }
  for (int i = 0; i < channel_out; i++) {
    int out_c_block = i / oc_block;
    int out_c_res = i % oc_block;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm.cc
@@ -84,7 +84,7 @@ int LstmCPUKernel::InitWeightBias() {
  }

  auto bias_data = reinterpret_cast<float *>(in_tensors_.at(3)->Data());
  int state_bias_offset = 4 * lstm_parm_->hidden_size_;
  const int state_bias_offset = 4 * lstm_parm_->hidden_size_;
  for (int i = 0; i < state_bias_offset; i++) {
    bias_ptr_[i] = bias_data[i] + bias_data[i + state_bias_offset];
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc
@@ -66,7 +66,7 @@ int PowerCPUKernel::RunImpl(int task_id) {
    exp_addr = reinterpret_cast<float *>(in_tensors_[1]->Data());
    broadcast = false;
  }
  float *cur_exp;
  float *cur_exp = nullptr;
  if (broadcast) {
    cur_exp = &power_;
  } else {
--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
@@ -161,7 +161,7 @@ int SqueezeInt8CPUKernel::Run() {
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: ";
  }
  return RET_OK;
  return ret;
 }

 int SqueezeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_fp16.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_fp16.c
@@ -35,7 +35,7 @@ void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weigh
 void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weight, float16_t *bias, size_t step,
                           size_t ic4, size_t out_channel, size_t offset, size_t mode, size_t writeC4, size_t relu,
                           size_t relu6) {
  int tile_n = 16;
  const int tile_n = 16;
  for (int i = 0; i < out_channel; i++) {
    int oc8_block = i / 8;
    int oc8_res = i % 8;
@@ -76,7 +76,7 @@ void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weigh
 void IndirectGemmFp16_16x8_tmp(float16_t *output, float16_t *input, float16_t *weight, const float16_t *bias,
                               size_t step, size_t ic4, size_t output_channel, size_t offset, size_t mode,
                               size_t writeC4, size_t relu, size_t relu6) {
  int tile_num = 16;
  const int tile_num = 16;
  if (mode) {
    for (int i = 0; i < tile_num; i++) {
      int input_tile_offset = i * C4NUM;
@@ -175,8 +175,8 @@ void Conv3x3Fp16(float16_t *input_data, float16_t *transed_weight, const float16
  // todo
  int thread_count = conv_param->thread_num_;
  int tile_num = 16;
  int output_unit = 4;
  int k_plane = 36;
  const int output_unit = 4;
  const int k_plane = 36;
  int ic4 = UP_DIV(conv_param->input_channel_, C4NUM);
  int oc8 = UP_DIV(conv_param->output_channel_, C8NUM);

--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.c
@@ -190,14 +190,16 @@ void Conv3x3Fp16InputUnit(float16_t *tmp_data, float16_t *trans_input_data, size
 void Conv3x3Fp16InputTransform(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data,
                               int start_index, int real_cal_num, int out_w_block, ConvParameter *conv_param) {
  // input data format : nhwc
  int output_unit = 4;
  const int output_unit = 4;
  int input_channel = conv_param->input_channel_;
  int input_width = conv_param->input_w_;
  int input_height = conv_param->input_h_;
  int pad_w = conv_param->pad_w_;
  int pad_h = conv_param->pad_h_;
  int ic4 = UP_DIV(input_channel, C4NUM);

  if (out_w_block == 0) {
    return;
  }
  for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
    int x_id = start_index + cal_id;
    int origin_x = (x_id % out_w_block) * output_unit - pad_w;
@@ -511,7 +513,9 @@ void Conv3x3Fp16OutputTransform(const float16_t *gemm_out, float16_t *out_data,
  int output_h = conv_param->output_h_;
  int out_h_block = UP_DIV(output_h, C4NUM);
  int oc8 = UP_DIV(output_channel, C8NUM);

  if (out_w_block == 0) {
    return;
  }
  for (int i = 0; i < real_cal_num; i++) {
    int out_w_index = (start_index + i) % out_w_block;
    int out_h_index = (start_index + i) / out_w_block;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
@@ -65,6 +65,9 @@ void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr

 void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_ptr, size_t output_channel,
                      size_t plane_size, size_t stride, bool is_relu, bool is_relu6, int size) {
  if (size == 0) {
    return;
  }
  for (int oc = 0; oc < output_channel; oc++) {
    int oc_div = oc / size, oc_mod = oc % size;
    for (int hw = 0; hw < plane_size; hw++) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv.c
@@ -142,7 +142,7 @@ void ConvSWFp32(const float *input_data, const float *packed_weight, const float
  int ic4 = slidingWindow_param->ic4_channel_ / C4NUM;
  int oc4_res = conv_param->output_channel_ % C4NUM;
  const float *src = input_data;
  float *dst;
  float *dst = NULL;
  if (oc4_res == 0) {
    dst = output_data;
  } else {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.c
@@ -328,36 +328,36 @@ void ConvDw3x3Fp32FilterTrans(float *trans_weight, float *weight, int oc4) {
      float dst01 = (local_ptr + 4)[0];
      float dst02 = (local_ptr + 8)[0];

      float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
      float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
      float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
      const float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
      const float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
      const float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];

      float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
      float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
      float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
      const float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
      const float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
      const float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];

      float dst30 = (local_ptr + 24)[0];
      float dst31 = (local_ptr + 28)[0];
      float dst32 = (local_ptr + 32)[0];

      float m00 = dst00;
      float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
      float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
      const float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
      const float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
      float m03 = dst02;

      float m10 = dst10;
      float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
      float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
      const float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
      const float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
      float m13 = dst12;

      float m20 = dst20;
      float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
      float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
      const float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
      const float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
      float m23 = dst22;

      float m30 = dst30;
      float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
      float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
      const float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
      const float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
      float m33 = dst32;

      *(dst + j) = m00;
@@ -387,7 +387,7 @@ void ConvDw3x3Fp32FilterTrans(float *trans_weight, float *weight, int oc4) {
 void ConvDw3x3Fp32InputTrans(const float *input_data, float *trans_input, float *block_buffer, int out_h_block,
                             int out_w_block, const ConvParameter *conv_param) {
  int ic4 = UP_DIV(conv_param->input_channel_, C4NUM);
  int input_unit = 4;
  const int input_unit = 4;
  memset(trans_input, 0, out_h_block * out_h_block * 16 * C4NUM * sizeof(float));

  for (int oh = 0; oh < out_h_block; oh++) {
@@ -426,7 +426,7 @@ void ConvDw3x3Fp32InputTrans(const float *input_data, float *trans_input, float

 // todo yangruoqi: implement assembly
 void ConvDw3x3Fp32Winograd(float *trans_buffer, const float *weight, int out_h_block, int out_w_block) {
  int unit = 4;
  const int unit = 4;
  for (int oh = 0; oh < out_h_block; oh++) {
    float *buf_oh = trans_buffer + oh * out_w_block * 16 * C4NUM;
    for (int ow = 0; ow < out_w_block; ow++) {
@@ -583,7 +583,7 @@ void ConvDw3x3Fp32OutputTrans(float *trans_buffer, float *output_data, const flo
  int oc4 = UP_DIV(conv_param->output_channel_, C4NUM);
  bool h_in_range = true;
  for (int oh = 0; oh < out_h_block; oh++) {
    int real_oh = 2 * oh;
    const int real_oh = 2 * oh;
    if ((oh + 1) * 2 > conv_param->output_h_) {
      h_in_range = false;
    }
@@ -592,7 +592,7 @@ void ConvDw3x3Fp32OutputTrans(float *trans_buffer, float *output_data, const flo
    float *output_oh = output_data + real_oh * conv_param->output_w_ * oc4 * C4NUM;

    for (int ow = 0; ow < out_w_block; ow++) {
      int real_ow = 2 * ow;
      const int real_ow = 2 * ow;
      if ((ow + 1) * 2 > conv_param->output_w_) {
        w_in_range = false;
      }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/resize.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/resize.c
@@ -47,13 +47,13 @@ int ResizeBilinear(const float *input_data, float *output_data, const int *input
      int y_bottom = (int)(floor(actual_y));
      int y_top = y_bottom + 1 < in_h ? (y_bottom + 1) : (in_h - 1);
      float y_top_weight = actual_y - (float)(y_bottom);
      float y_bottom_weight = 1.0f - y_top_weight;
      const float y_bottom_weight = 1.0f - y_top_weight;
      for (w = 0; w < new_width; w++) {
        float actual_x = (float)(w)*width_scale;
        int x_left = (int)(floor(actual_x));
        int x_right = x_left + 1 < in_w ? (x_left + 1) : (in_w - 1);
        float x_right_weight = actual_x - (float)(x_left);
        float x_left_weight = 1.0f - x_right_weight;
        const float x_left_weight = 1.0f - x_right_weight;
        c = 0;
 #ifdef ENABLE_NEON
        for (; c <= in_c - 4; c += 4) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/roi_pooling.c
@@ -30,7 +30,7 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, const int *in_shape, c
  int pooled_width = param->pooledW_;
  int in_stride[DIMENSION_4D];
  int out_stride[DIMENSION_4D];
  int roi_stride = 5;
  const int roi_stride = 5;
  in_stride[DIMENSION_4D - 1] = 1;
  out_stride[DIMENSION_4D - 1] = 1;
  for (int i = dim - 2; i >= 0; --i) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/space_to_batch.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/space_to_batch.c
@@ -138,7 +138,7 @@ void DoPadding(const float *input, float *padded_input, SpaceToBatchParameter pa
 }

 int SpaceToBatch(const float *input, float *output, SpaceToBatchParameter param, float *tmp_space[3]) {
  float *padded_input;
  float *padded_input = NULL;
  int ret;
  if (param.need_paddings_) {
    if (tmp_space[0] == NULL || tmp_space[1] == NULL || tmp_space[2] == NULL) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32_grad/pooling_grad.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32_grad/pooling_grad.c
@@ -30,7 +30,7 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter
  int output_h = pooling_param->output_h_;
  int output_batch = pooling_param->output_batch_;

  const float *inPtr;
  const float *inPtr = NULL;
  for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0;

  // int pad_top = padding[2];
@@ -119,7 +119,7 @@ void MaxPoolingGrad(const float *dy, const int *indices, float *output_ptr, Pool

  const float *yt = (const float *)(dy);
  const int *pos = (const int *)(indices);
  float *out;
  float *out = NULL;

  if (1) {  // grads->layout() == Tensor::nhwc)
    for (int ib = 0; ib < output_batch; ib++) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/arg_min_max_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/arg_min_max_int8.c
@@ -34,7 +34,7 @@ void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_co
 void DoArgMinMaxQuant(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count,
                      int axis_count, int after_axis_count, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  bool out_value = param->out_value_;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  const float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
  int32_t output_zp = out_quant_arg->zp_;
  for (int i = 0; i < pre_axis_count; ++i) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batch_to_space_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batch_to_space_int8.c
@@ -28,7 +28,7 @@ void BatchToSpaceNoCropForNHWCInt8(const int8_t *input, int8_t *output, const in
  size_t output_offset = 0;
  size_t in_stride_h = in_w * in_c;
  size_t in_stride_n = in_stride_h * in_h;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  const float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float scale = in_quant_arg->scale_ * output_inverse_scale;
  float bias = -in_quant_arg->zp_ * scale;
  int32_t output_zp = out_quant_arg->zp_;
@@ -76,7 +76,7 @@ void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_
  size_t in_stride_h = in_w * in_c;
  size_t in_stride_n = in_stride_h * in_h;

  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  const float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float scale = in_quant_arg->scale_ * output_inverse_scale;
  float bias = -in_quant_arg->zp_ * scale;
  int32_t output_zp = out_quant_arg->zp_;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/concat_int8.c
@@ -20,7 +20,7 @@

 void Int8Concat(int8_t **inputs, int8_t *output, ConcatParameter *para, int axis, int64_t real_dst_count, int task_id) {
  float output_scale = para->quant_arg_.out_args_.scale_;
  float output_inverse_scale = 1.f / output_scale;
  const float output_inverse_scale = 1.f / output_scale;
  int input_num = para->input_num_;
  int count_unit_ = para->count_unit_;
  int after_axis_size = para->after_axis_size;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c
@@ -201,7 +201,7 @@ void Conv3x3Uint8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, i
 #ifdef ENABLE_ARM
  IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t));
 #else
  int input_unit_square = 16;
  const int input_unit_square = 16;
  for (int c = 0; c < oc4; c++) {
    int filter_oc_offset = c * input_unit_square * ic8 * C8NUM * C4NUM;
    int dst_oc_offset = c * input_unit_square * C4NUM;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/depth_to_space_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/depth_to_space_int8.c
@@ -22,7 +22,7 @@ void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, int *in_shape,
  int32_t in_shape_dim2 = in_shape[2];
  int32_t in_shape_dim1 = in_shape[1];
  size_t copy_size = block_size * param->out_stride_dim2_;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  const float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float scale = in_quant_arg->scale_ * output_inverse_scale;
  float bias = -in_quant_arg->zp_ * scale;
  int32_t output_zp = out_quant_arg->zp_;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/pooling_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/pooling_int8.c
@@ -36,8 +36,8 @@ void AvgPoolingInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParamete
  float output_scale = pooling_param->quant_args_[1][0].scale_;
  int output_zp = pooling_param->quant_args_[1][0].zp_;
  double real_multiplier = input_scale / output_scale;
  int8_t out_min = INT8_MIN;
  int8_t out_max = INT8_MAX;
  const int8_t out_min = INT8_MIN;
  const int8_t out_max = INT8_MAX;

  for (int batch = 0; batch < output_batch; batch++) {
    int in_batch_offset = batch * in_h * in_w * channel;
@@ -91,8 +91,8 @@ void AvgPoolingOptInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParam
  int out_tile_count = UP_DIV(out_plane, TILE_NUM);
  int thread_num = pooling_param->thread_num_;
  int c8 = UP_DIV(channel, C8NUM);
  int8_t out_min = INT8_MIN;
  int8_t out_max = INT8_MAX;
  const int8_t out_min = INT8_MIN;
  const int8_t out_max = INT8_MAX;

  for (int batch = 0; batch < output_batch; batch++) {
    int in_batch_offset = batch * in_h * in_w * channel;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/prelu_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/prelu_int8.c
@@ -20,7 +20,7 @@
 void prelu(int8_t *inputs, int8_t *output_ptr, PreluParameter *quant_prelu_parm, int task_id) {
  float output_scale = quant_prelu_parm->quant_arg.out_args_.scale_;
  int output_zp = quant_prelu_parm->quant_arg.out_args_.zp_;
  float output_inverse_scale = 1.f / output_scale;
  const float output_inverse_scale = 1.f / output_scale;
  int output_dim = quant_prelu_parm->input_dim_;

  QuantArg *input_quant = NULL;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/reshape_int8.c
@@ -22,7 +22,7 @@ void Int8Reshape(int8_t *input_ptr, int8_t *output_ptr, int64_t real_dst_count,
  if (para.in_args_.scale_ == para.out_args_.scale_ && para.in_args_.zp_ == para.out_args_.zp_) {
    memcpy(output_ptr, input_ptr, real_dst_count);
  } else {
    float output_inverse_scale = 1.f / para.out_args_.scale_;
    const float output_inverse_scale = 1.f / para.out_args_.scale_;
    float scale = para.in_args_.scale_ * output_inverse_scale;
    float bias = -para.in_args_.zp_ * scale;
    int32_t output_zp = para.out_args_.zp_;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/resize.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/resize.c
@@ -115,6 +115,9 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat
 }

 void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale) {
  if (out_value == 0) {
    return;
  }
  *scale = (in_value * (1 << 10) + out_value / 2) / out_value;
  if (align_corners && out_value > 1) {
    *scale = ((in_value - 1) * (1 << 10) + (out_value - 1) / 2) / (out_value - 1);
@@ -133,6 +136,9 @@ void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int3

 void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
                               int32_t *nearest) {
  if (new_size == 0) {
    return;
  }
  *nearest = (in_size * pos) / new_size;
  if (align_corners) {
    *nearest = ((in_size - 1) * pos + (new_size - 1) / 2) / (new_size - 1);
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/squeeze_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/squeeze_int8.c
@@ -20,11 +20,11 @@
 void Squeeze(int8_t **inputs, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
             SqueezeParameter *para_, size_t osize) {
  float output_scale = quant_Squeeze_parm->out_quant_args_.scale_;
  float output_inverse_scale = 1.f / output_scale;
  const float output_inverse_scale = 1.f / output_scale;
  QuantArg *input_quant = quant_Squeeze_parm->in_quant_args_;
  int output_zp = quant_Squeeze_parm->out_quant_args_.zp_;

  int i = 0;
  const int i = 0;
  int8_t *input_ptr = inputs[0];
  for (int j = task_id; j < osize; j += para_->op_parameter_.thread_num_) {
    float scale = input_quant[i].scale_ * output_inverse_scale;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/pack.c
@@ -21,6 +21,9 @@
 void PackWeightFp32(float *weight_data, ConvParameter *conv_param, float *packed_weight, int oc_block,
                    int oc_block_num) {
  // original weight format : ohwi
  if (oc_block_num == 0) {
    return;
  }
  int kernel_h = conv_param->kernel_h_;
  int kernel_w = conv_param->kernel_w_;
  int in_channel = conv_param->input_channel_;
@@ -30,7 +33,7 @@ void PackWeightFp32(float *weight_data, ConvParameter *conv_param, float *packed
  int pack_weight_size = oc_block * oc_block_num * ic4 * C4NUM * kernel_plane;

  int unit_size = oc_block * C4NUM;
  int block_size = pack_weight_size / oc_block_num;
  const int block_size = pack_weight_size / oc_block_num;

  for (int m = 0; m < kernel_plane; m++) {
    int kernel_plane_stride = m * in_channel;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/prior_box.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/prior_box.c
@@ -19,6 +19,9 @@
 #include "nnacl/prior_box.h"

 int PriorBox(const float *input_data, float *output_data, const size_t size, const int tid, const int thread_num) {
  if (thread_num == 0) {
    return NNACL_ERR;
  }
  size_t unit_size = size / thread_num;
  if (tid == thread_num - 1) {
    size_t tail_size = size - unit_size * tid;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/unique.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/unique.c
@@ -26,7 +26,7 @@ int Find(float *array, int len, float target) {
 }

 void Unique(float *input, int input_len, float *output0, int *output0_len, int *output1) {
  output0_len = 0;
  *output0_len = 0;
  for (int i = 0; i < input_len; i++) {
    int idx = Find(output0, *output0_len, input[i]);
    if (idx != -1) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_transform.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_transform.c
@@ -28,7 +28,9 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
  int pad_w = conv_param->pad_w_;
  int input_h = conv_param->input_h_;
  int input_w = conv_param->input_w_;

  if (out_w_block_num == 0) {
    return;
  }
  for (int c = 0; c < cal_num; c++) {  // actual tiled number
    int src_x_s = (out_tile_index % out_w_block_num) * output_unit - pad_w;
    int src_y_s = (out_tile_index / out_w_block_num) * output_unit - pad_h;
@@ -83,7 +85,9 @@ void WinogradOutputTransform(const float *gemm_out, float *tmp_out_data, const f
  int output_channel = conv_param->output_channel_;
  int oc4 = UP_DIV(output_channel, C4NUM);
  int input_unit = conv_param->input_unit_;

  if (output_unit_num == 0) {
    return;
  }
  for (int i = 0; i < cal_num; i++) {
    int dst_x_s = out_tile_index % output_unit_num;
    int dst_y_s = out_tile_index / output_unit_num;
@@ -281,7 +285,9 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa
  int pad_h = conv_param->pad_h_;
  int ic4 = UP_DIV(input_channel, C4NUM);
  int input_unit = 4;

  if (out_w_block == 0) {
    return;
  }
  for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
    int x_id = start_index + cal_id;
    int origin_x = (x_id % out_w_block) * OUPUT_UNIT - pad_w;
@@ -328,8 +334,11 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa

 void Conv3x3Fp32FilterTransform(float *weight_data, float *trans_weight, int iC4, int output_channel, int kernel_plane,
                                int oc_block) {
  int input_unit = 4;
  const int input_unit = 4;
  int dst_step = iC4 * C4NUM * oc_block;
  if (oc_block == 0) {
    return;
  }
  for (int o = 0; o < output_channel; o++) {
    int oc_block_num = o / oc_block;
    int oc_block_rem = o % oc_block;
@@ -485,36 +494,36 @@ void Conv3x3Fp32FilterTransform(float *weight_data, float *trans_weight, int iC4
        float dst01 = (local_ptr + 4)[0];
        float dst02 = (local_ptr + 8)[0];

        float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
        float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
        float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
        const float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
        const float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
        const float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];

        float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
        float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
        float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
        const float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
        const float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
        const float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];

        float dst30 = (local_ptr + 24)[0];
        float dst31 = (local_ptr + 28)[0];
        float dst32 = (local_ptr + 32)[0];

        float m00 = dst00;
        float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
        float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
        const float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
        const float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
        float m03 = dst02;

        float m10 = dst10;
        float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
        float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
        const float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
        const float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
        float m13 = dst12;

        float m20 = dst20;
        float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
        float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
        const float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
        const float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
        float m23 = dst22;

        float m30 = dst30;
        float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
        float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
        const float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
        const float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
        float m33 = dst32;

        *(dst_ic4_ptr + j * 8) = m00;
@@ -652,8 +661,10 @@ void Conv3x3Fp32OutputTransform(const float *gemm_out, float *out_data, const fl
  int output_w = conv_param->output_w_;
  int output_h = conv_param->output_h_;
  int oc4 = UP_DIV(output_channel, C4NUM);
  int input_unit = 4;

  const int input_unit = 4;
  if (out_w_block == 0) {
    return;
  }
  for (int i = 0; i < real_cal_num; i++) {
    int out_w_index = (start_index + i) % out_w_block;
    int out_h_index = (start_index + i) / out_w_block;
@@ -855,9 +866,11 @@ void Conv3x3Uint8InputTransform(const int16_t *input_data, int16_t *trans_input,
  int pad_h = conv_param->pad_h_;
  ConvQuantArg quant_arg = conv_param->conv_quant_arg_;
  int input_zp = quant_arg.input_quant_args_[0].zp_;
  int ic8 = UP_DIV(input_channel, C8NUM);
  int input_unit = 4;

  const int ic8 = UP_DIV(input_channel, C8NUM);
  const int input_unit = 4;
  if (out_w_block == 0) {
    return;
  }
  for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
    int x_id = start_index + cal_id;
    int origin_x = (x_id % out_w_block) * OUPUT_UNIT - pad_w;
@@ -890,7 +903,7 @@ void Conv3x3Uint8InputTransform(const int16_t *input_data, int16_t *trans_input,

 void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weight, int iC8, int output_channel,
                                int kernel_plane) {
  int input_unit = 4;
  const int input_unit = 4;
  int dst_step = iC8 * C8NUM * C4NUM;
  for (int o = 0; o < output_channel; o++) {
    int oc4_block_num = o / C4NUM;
@@ -1441,9 +1454,11 @@ void Conv3x3Uint8OutputTransform(const int32_t *gemm_out, int8_t *out_data, cons
  int output_channel = conv_param->output_channel_;
  int output_w = conv_param->output_w_;
  int output_h = conv_param->output_h_;
  int oc4 = UP_DIV(output_channel, C4NUM);
  int input_unit = 4;

  const int oc4 = UP_DIV(output_channel, C4NUM);
  const int input_unit = 4;
  if (out_w_block == 0) {
    return;
  }
  for (int i = 0; i < real_cal_num; i++) {
    int out_w_index = (start_index + i) % out_w_block;
    int out_h_index = (start_index + i) / out_w_block;
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_utils.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/winograd_utils.c
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl
@@ -49,6 +49,9 @@ __kernel void ElementDiv(__read_only image2d_t input_a, __read_only image2d_t in

  float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
  float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
  if (b == 0) {
    return;
  }
  write_imagef(output, (int2)(X, Y), a / b);
 }

--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
@@ -249,6 +249,10 @@ int ConvolutionOpenCLKernel::GetGlobalLocal(std::vector<size_t> *global, std::ve
  size_t global_c = UP_DIV(UP_DIV(param->output_channel_, C4NUM), work_group_size[2]) * work_group_size[2];

  size_t local_c = GetBiggestDivider(global_c, max_z_size);
  if (local_c == 0) {
    MS_LOG(ERROR) << "Divide by zero";
    return RET_ERROR;
  }
  size_t local_hw_size = std::min<size_t>(256, max_work_group_size) / local_c;
  size_t local_w = std::min(global_w, local_hw_size);
  size_t local_h = std::min(local_hw_size / local_w, global_h);
--- a/mindspore/lite/src/runtime/kernel/opencl/utils.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/utils.cc
@@ -32,6 +32,10 @@ std::vector<size_t> GetCommonGlobalSize(const std::vector<size_t> &local, const

 std::vector<size_t> GetCommonLocalSize(const std::vector<size_t> &global, int max_size) {
  size_t wg_z = GetBiggestDividerWithPriority(global[2], 8);
  if (wg_z == 0) {
    MS_LOG(ERROR) << "Divide by zero";
    return {};
  }
  size_t wg_xy_size = max_size / wg_z;
  size_t wg_x = std::min(DivideRoundUp(global[0], 2), wg_xy_size);
  size_t wg_y = std::min(wg_xy_size / wg_x, global[1]);
--- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc
@@ -130,6 +130,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t>& img_size)
  if (ret != CL_SUCCESS) {
    MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
    UnLock();
    delete buffer;
    return nullptr;
  }
  device_ptr = static_cast<void *>(buffer);
@@ -138,6 +139,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t>& img_size)
  if (host_ptr == nullptr) {
    MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
    UnLock();

    return nullptr;
  }
  cl::Memory *mem = buffer;
@@ -187,6 +189,7 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v
  if (ret != CL_SUCCESS) {
    MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
    UnLock();
    delete buffer;
    return nullptr;
  }
  device_ptr = static_cast<void *>(buffer);
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@@ -195,7 +195,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<tensor::Tensor *>
  }

  desc.arch = kernel::KERNEL_ARCH::kCPU;
  kernel::LiteKernel *kernel;
  kernel::LiteKernel *kernel = nullptr;
  if (data_type == kNumberTypeFloat32) {
    // check if support fp16
    kernel::KernelKey key{desc.arch, kNumberTypeFloat16, desc.type};
--- a/mindspore/lite/src/train/import.hpp
+++ b/mindspore/lite/src/train/import.hpp
@@ -25,6 +25,10 @@ std::shared_ptr<ModelImpl> Import(const char *model_buf, size_t size) {
    return nullptr;
  }
  // todo hangangqiang remove when copy primitive done
  if (size <= 0) {
    MS_LOG(ERROR) << "size is zero";
    return nullptr;
  }
  auto *inner_buf = new char[size];
  memcpy(inner_buf, model_buf, size);
  auto meta_graph = schema::GetMetaGraph(inner_buf);
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -462,7 +462,7 @@ void BenchmarkFlags::InitInputDataList() {
  char *cur_input;
  const char *split_c = ",";
  cur_input = strtok(input_list, split_c);
  while (cur_input) {
  while (cur_input != nullptr) {
    input_data_list.emplace_back(cur_input);
    cur_input = strtok(nullptr, split_c);
  }
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_arithmetic_operation_parser.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_arithmetic_operation_parser.cc
@@ -117,9 +117,9 @@ STATUS OnnxEltwiseParser::Parse(const onnx::GraphProto &onnx_graph,
  std::unique_ptr<schema::EltwiseT> attr(new schema::EltwiseT());
  if (onnx_node.op_type() == "Prod") {
    attr->mode = schema::EltwiseMode_PROD;
  } else if (onnx_node.op_type() == "Prod") {
    attr->mode = schema::EltwiseMode_SUM;
  } else if (onnx_node.op_type() == "Sum") {
    attr->mode = schema::EltwiseMode_SUM;
  } else if (onnx_node.op_type() == "Maximum") {
    attr->mode = schema::EltwiseMode_MAXIMUM;
  }

--- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc
@@ -166,6 +166,7 @@ const void ConvTransformFusion::CalNewWeightTensor(float *weight_data, int kerne
  auto data_size = kernel_num * kernel_size * sizeof(float);
  if (0 != memset_s(tmp_weight_data, data_size, 0, data_size)) {
    MS_LOG(EXCEPTION) << "memset newWeightData failed";
    delete[] tmp_weight_data;
    return;
  }