!9127 [MSLITE][Develop] fix code review

From: @sunsuodong Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tong
5 years ago · a1a2a957cd
--- a/mindspore/lite/nnacl/arg_min_max.c
+++ b/mindspore/lite/nnacl/arg_min_max.c
@@ -33,7 +33,7 @@ void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis
  }
 }

 void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMaxParameter *param) {
 void ArgMinMaxTopk1(const void *input, void *output, const int *shape, const ArgMinMaxParameter *param) {
  int pre_axis_count = 1;
  int axis_count = 1;
  int after_axis_count = 1;
@@ -48,7 +48,7 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax
  }
 }

 void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->get_max_) {
    switch (param->axis_) {
      case 0:
@@ -82,7 +82,7 @@ void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->topk_ == 1) {
    ArgMinMaxTopk1(input, output, in_shape, param);
    return;
--- a/mindspore/lite/nnacl/arg_min_max.h
+++ b/mindspore/lite/nnacl/arg_min_max.h
@@ -21,7 +21,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/arithmetic_common.c
+++ b/mindspore/lite/nnacl/arithmetic_common.c
@@ -17,8 +17,8 @@
 #include "nnacl/arithmetic_common.h"
 #include "nnacl/nnacl_utils.h"

 void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides,
                      int *outStrides, int *multiple) {
 void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape,
                      const int *inStrides, const int *outStrides, const int *multiple) {
  int srcDimSize = inShape[dim];
  if (dim == ndim - 1) {
    for (int i = 0; i < multiple[dim]; i++) {
@@ -35,8 +35,8 @@ void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *
  }
 }

 void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t ndim, int *inShape, int *inStrides,
                           int *outStrides, int *multiple) {
 void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape,
                           const int *inStrides, const int *outStrides, const int *multiple) {
  int srcDimSize = inShape[dim];
  if (dim == ndim - 1) {
    for (int i = 0; i < multiple[dim]; i++) {
@@ -74,7 +74,8 @@ void CalcMultiplesAndStrides(ArithmeticParameter *param) {
  ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_);
 }

 void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param) {
 void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1,
                    ArithmeticParameter *param) {
  CalcMultiplesAndStrides(param);
  TileOneDimension(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_,
                   param->multiples0_);
@@ -82,7 +83,7 @@ void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_d
                   param->multiples1_);
 }

 void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1,
 void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1,
                         ArithmeticParameter *param) {
  CalcMultiplesAndStrides(param);
  TileOneDimensionUint8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_,
@@ -91,7 +92,7 @@ void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, ui
                        param->multiples1_);
 }

 void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
 void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
                        ArithmeticParameter *param) {
  CalcMultiplesAndStrides(param);
  TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_, param->in_shape0_,
--- a/mindspore/lite/nnacl/arithmetic_common.h
+++ b/mindspore/lite/nnacl/arithmetic_common.h
@@ -47,18 +47,19 @@ typedef struct ArithmeticParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
 void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides,
                      int *outStrides, int *multiple);
 void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape,
                      const int *inStrides, const int *outStrides, const int *multiple);
 void ComputeStrides(const int *shape, int *strides, const int ndim);

 void CalcMultiplesAndStrides(ArithmeticParameter *param);

 void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t ndim, int *inShape, int *inStrides,
                           int *outStrides, int *multiple);
 void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param);
 void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1,
 void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape,
                           const int *inStrides, const int *outStrides, const int *multiple);
 void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1,
                    ArithmeticParameter *param);
 void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1,
                         ArithmeticParameter *param);
 void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
 void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
                        ArithmeticParameter *param);
 #ifdef __cplusplus
 }
--- a/mindspore/lite/nnacl/flatten.c
+++ b/mindspore/lite/nnacl/flatten.c
@@ -16,6 +16,6 @@
 #include "nnacl/flatten.h"
 #include <string.h>

 void Flatten(const void *input, void *output, FlattenParameter *flatten_param) {
 void Flatten(const void *input, void *output, const FlattenParameter *flatten_param) {
  memcpy(output, input, flatten_param->size);
 }
--- a/mindspore/lite/nnacl/flatten.h
+++ b/mindspore/lite/nnacl/flatten.h
@@ -25,7 +25,7 @@ typedef struct FlattenParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
 void Flatten(const void *input, void *output, FlattenParameter *flatten_param);
 void Flatten(const void *input, void *output, const FlattenParameter *flatten_param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/arg_min_max_fp32.c
+++ b/mindspore/lite/nnacl/fp32/arg_min_max_fp32.c
@@ -43,7 +43,7 @@ int ArgCompareDescFp32(const void *a, const void *b) {
  return 0;
 }

 void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
@@ -58,7 +58,7 @@ void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
@@ -73,7 +73,7 @@ void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
@@ -88,7 +88,7 @@ void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
@@ -103,7 +103,7 @@ void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
@@ -123,7 +123,7 @@ void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
@@ -143,7 +143,7 @@ void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
@@ -163,7 +163,7 @@ void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
@@ -183,7 +183,7 @@ void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
@@ -208,7 +208,7 @@ void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
@@ -233,7 +233,7 @@ void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
@@ -258,7 +258,7 @@ void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
@@ -283,7 +283,7 @@ void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
@@ -311,7 +311,7 @@ void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
@@ -339,7 +339,7 @@ void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
@@ -367,7 +367,7 @@ void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
@@ -395,7 +395,7 @@ void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMaxDim0OutValue(input, output, in_shape, param);
  } else {
@@ -403,7 +403,7 @@ void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMa
  }
 }

 void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMinDim0OutValue(input, output, in_shape, param);
  } else {
@@ -411,7 +411,7 @@ void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMa
  }
 }

 void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMaxDim1OutValue(input, output, in_shape, param);
  } else {
@@ -419,7 +419,7 @@ void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMa
  }
 }

 void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMinDim1OutValue(input, output, in_shape, param);
  } else {
@@ -427,7 +427,7 @@ void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMa
  }
 }

 void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMaxDim2OutValue(input, output, in_shape, param);
  } else {
@@ -435,7 +435,7 @@ void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMa
  }
 }

 void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMinDim2OutValue(input, output, in_shape, param);
  } else {
@@ -443,7 +443,7 @@ void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMa
  }
 }

 void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMaxDim3OutValue(input, output, in_shape, param);
  } else {
@@ -451,7 +451,7 @@ void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMa
  }
 }

 void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
 void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMinDim3OutValue(input, output, in_shape, param);
  } else {
@@ -459,7 +459,7 @@ void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMa
  }
 }

 void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
 void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count) {
  bool out_value = param->out_value_;
  for (int i = 0; i < pre_axis_count; ++i) {
@@ -480,7 +480,7 @@ void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pr
  }
 }

 void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
 void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count) {
  bool out_value = param->out_value_;
  for (int i = 0; i < pre_axis_count; ++i) {
--- a/mindspore/lite/nnacl/fp32/arg_min_max_fp32.h
+++ b/mindspore/lite/nnacl/fp32/arg_min_max_fp32.h
@@ -21,18 +21,18 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
 void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count);
 void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
 void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count);
 void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
 void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
 void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
 void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
 void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
 void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
 void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
 void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
+++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
@@ -615,8 +615,8 @@ int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const
  return NNACL_OK;
 }

 int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                 ArithmeticParameter *param) {
 int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                 int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementMul(tile_input0, tile_input1, output, element_size);
 }
@@ -690,21 +690,21 @@ int ElementAddInt(const int *input0, const int *input1, int *output, const int e
  return NNACL_OK;
 }

 int ElementAddInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
 int ElementAddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int element_size) {
  for (int i = 0; i < element_size; i++) {
    output[i] = input0[i] + input1[i];
  }
  return NNACL_OK;
 }

 int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                 ArithmeticParameter *param) {
 int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                 int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementAdd(tile_input0, tile_input1, output, element_size);
 }

 int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output,
                     int element_size, ArithmeticParameter *param) {
 int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1,
                     int8_t *output, int element_size, ArithmeticParameter *param) {
  TileDimensionsInt8(input0, input1, tile_input0, tile_input1, param);
  return ElementAddInt8(tile_input0, tile_input1, output, element_size);
 }
@@ -763,8 +763,8 @@ int ElementSubRelu6(const float *input0, const float *input1, float *output, con
  return NNACL_OK;
 }

 int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                 ArithmeticParameter *param) {
 int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                 int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementSub(tile_input0, tile_input1, output, element_size);
 }
@@ -791,8 +791,8 @@ int ElementDivRelu6(const float *input0, const float *input1, float *output, con
  return NNACL_OK;
 }

 int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                 ArithmeticParameter *param) {
 int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                 int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementDiv(tile_input0, tile_input1, output, element_size);
 }
@@ -811,7 +811,7 @@ int ElementFloorModInt(const int *input0, const int *input1, int *output, const
  return NNACL_OK;
 }

 int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                      int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementFloorMod(tile_input0, tile_input1, output, element_size);
@@ -831,7 +831,7 @@ int ElementFloorDivInt(const int *input0, const int *input1, int *output, const
  return NNACL_OK;
 }

 int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                      int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementFloorDiv(tile_input0, tile_input1, output, element_size);
@@ -862,13 +862,13 @@ int ElementSquaredDifference(const float *input0, const float *input1, float *ou
  return ElementMul(output, output, output, element_size);
 }

 int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
                               int element_size, ArithmeticParameter *param) {
 int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1,
                               float *output, int element_size, ArithmeticParameter *param) {
  BroadcastSub(input0, input1, tile_input0, tile_input1, output, element_size, param);
  return ElementMul(output, output, output, element_size);
 }

 int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                        int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementLogicalAnd(tile_input0, tile_input1, output, element_size);
@@ -894,7 +894,7 @@ int ElementLogicalOr(const float *input0, const float *input1, float *output, co
  return NNACL_OK;
 }

 int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                       int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementLogicalOr(tile_input0, tile_input1, output, element_size);
@@ -916,7 +916,7 @@ int ElementMaximum(const float *input0, const float *input1, float *output, cons
  return NNACL_OK;
 }

 int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                     int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementMaximum(tile_input0, tile_input1, output, element_size);
@@ -938,7 +938,7 @@ int ElementMinimum(const float *input0, const float *input1, float *output, cons
  return NNACL_OK;
 }

 int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                     int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementMinimum(tile_input0, tile_input1, output, element_size);
@@ -970,7 +970,7 @@ int ElementNotEqual(const float *input0, const float *input1, float *output, con
  return NNACL_OK;
 }

 int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                      int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementNotEqual(tile_input0, tile_input1, output, element_size);
@@ -1002,7 +1002,7 @@ int ElementEqual(const float *input0, const float *input1, float *output, const
  return NNACL_OK;
 }

 int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                   int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementEqual(tile_input0, tile_input1, output, element_size);
@@ -1026,8 +1026,8 @@ int ElementLess(const float *input0, const float *input1, float *output, const i
  return NNACL_OK;
 }

 int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                  ArithmeticParameter *param) {
 int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                  int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementLess(tile_input0, tile_input1, output, element_size);
 }
@@ -1050,7 +1050,7 @@ int ElementLessEqual(const float *input0, const float *input1, float *output, co
  return NNACL_OK;
 }

 int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                       int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementLessEqual(tile_input0, tile_input1, output, element_size);
@@ -1074,7 +1074,7 @@ int ElementGreater(const float *input0, const float *input1, float *output, cons
  return NNACL_OK;
 }

 int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                     int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementGreater(tile_input0, tile_input1, output, element_size);
@@ -1098,8 +1098,8 @@ int ElementGreaterEqual(const float *input0, const float *input1, float *output,
  return NNACL_OK;
 }

 int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
                          int element_size, ArithmeticParameter *param) {
 int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1,
                          float *output, int element_size, ArithmeticParameter *param) {
  TileDimensions(input0, input1, tile_input0, tile_input1, param);
  return ElementGreaterEqual(tile_input0, tile_input1, output, element_size);
 }
@@ -1111,7 +1111,7 @@ int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *
                         int *in_datatype, int *out_datatype, OpParameter *param) {
  *out_format = in_format[0];
  *out_datatype = in_datatype[0];
  ArithmeticParameter *arithmetic_parameter = (ArithmeticParameter *)param;
  const ArithmeticParameter *arithmetic_parameter = (const ArithmeticParameter *)param;
  int ndim0 = dim_size[0];
  int ndim1 = dim_size[1];
  int *in_shape0 = in_shape[0];
--- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
+++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
@@ -64,85 +64,85 @@ int ElementMulRelu6(const float *input0, const float *input1, float *output, con
 int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size);
 int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size);
 int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size);
 int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                 ArithmeticParameter *param);
 int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                 int element_size, ArithmeticParameter *param);

 int ElementAdd(const float *input0, const float *input1, float *output, const int element_size);
 int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size);
 int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size);
 int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size);
 int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                 ArithmeticParameter *param);
 int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output,
                     int element_size, ArithmeticParameter *param);
 int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                 int element_size, ArithmeticParameter *param);
 int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1,
                     int8_t *output, int element_size, ArithmeticParameter *param);

 int ElementSub(const float *input0, const float *input1, float *output, const int element_size);
 int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size);
 int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                 ArithmeticParameter *param);
 int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                 int element_size, ArithmeticParameter *param);

 int ElementDiv(const float *input0, const float *input1, float *output, const int element_size);
 int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size);
 int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                 ArithmeticParameter *param);
 int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                 int element_size, ArithmeticParameter *param);

 int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                        int element_size, ArithmeticParameter *param);

 int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                       int element_size, ArithmeticParameter *param);

 int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                     int element_size, ArithmeticParameter *param);

 int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                     int element_size, ArithmeticParameter *param);

 int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size);
 int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size);
 int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                      int element_size, ArithmeticParameter *param);

 int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size);
 int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size);
 int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                      int element_size, ArithmeticParameter *param);

 int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
                               int element_size, ArithmeticParameter *param);
 int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1,
                               float *output, int element_size, ArithmeticParameter *param);

 int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size);

 int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                      int element_size, ArithmeticParameter *param);

 int ElementEqual(const float *input0, const float *input1, float *output, const int element_size);

 int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                   int element_size, ArithmeticParameter *param);

 int ElementLess(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
                  ArithmeticParameter *param);
 int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                  int element_size, ArithmeticParameter *param);

 int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                       int element_size, ArithmeticParameter *param);

 int ElementGreater(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
 int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
                     int element_size, ArithmeticParameter *param);

 int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size);
 int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
                          int element_size, ArithmeticParameter *param);
 int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1,
                          float *output, int element_size, ArithmeticParameter *param);

 #ifdef ENABLE_NNACL_INFER_SHAPE
 int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
--- a/mindspore/lite/nnacl/fp32/batchnorm_fp32.c
+++ b/mindspore/lite/nnacl/fp32/batchnorm_fp32.c
@@ -19,8 +19,8 @@
 #include "nnacl/batchnorm_parameter.h"
 #include "nnacl/op_base.h"

 void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id,
                   void *output) {
 void BatchNormFp32(const void *input, const void *mean, const void *variance, const BatchNormParameter *param,
                   int task_id, void *output) {
  int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
  int completed_units = task_id * units_per_thread;
  int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
@@ -37,7 +37,7 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, Ba
 }

 void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean,
                        const void *variance, BatchNormParameter *param, int task_id, void *output) {
                        const void *variance, const BatchNormParameter *param, int task_id, void *output) {
  int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
  int completed_units = task_id * units_per_thread;
  int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
@@ -53,7 +53,7 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset
  }
 }

 void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param,
 void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, const BatchNormParameter *param,
                               float *save_mean, float *save_var) {
  const float N = (float)param->unit_;
  const float VN = N;
--- a/mindspore/lite/nnacl/fp32/batchnorm_fp32.h
+++ b/mindspore/lite/nnacl/fp32/batchnorm_fp32.h
@@ -23,12 +23,12 @@
 extern "C" {
 #endif

 void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id,
                   void *output);
 void BatchNormFp32(const void *input, const void *mean, const void *variance, const BatchNormParameter *param,
                   int task_id, void *output);
 void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean,
                        const void *variance, BatchNormParameter *param, int task_id, void *output);
                        const void *variance, const BatchNormParameter *param, int task_id, void *output);

 void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param,
 void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, const BatchNormParameter *param,
                               float *save_mean, float *save_var);
 #ifdef __cplusplus
 }
--- a/mindspore/lite/nnacl/fp32/concat_fp32.c
+++ b/mindspore/lite/nnacl/fp32/concat_fp32.c
@@ -17,8 +17,8 @@
 #include "nnacl/fp32/concat_fp32.h"
 #include <string.h>

 void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
            int task_id, int thread_num) {
 void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size,
            void *output, int task_id, int thread_num) {
  int before_axis_size = 1;
  for (int i = 0; i < axis; ++i) {
    before_axis_size *= inputs_output_shape[0][i];
@@ -32,12 +32,12 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si
  uint8_t *dst_base = (output);
  size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis];
  for (int i = 0; i < input_num; ++i) {
    uint8_t *src_base = (input[i]);
    const uint8_t *src_base = (input[i]);
    size_t input_stride = after_axis_size * inputs_output_shape[i][axis];
    int offset = UP_DIV(input_stride, thread_num);
    int count = MSMIN(offset, input_stride - offset * task_id);
    for (int j = 0; j < before_axis_size; j++) {
      uint8_t *src = src_base + j * input_stride + task_id * offset;
      const uint8_t *src = src_base + j * input_stride + task_id * offset;
      uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size + task_id * offset;
      memcpy(dst, src, count);
    }
--- a/mindspore/lite/nnacl/fp32/concat_fp32.h
+++ b/mindspore/lite/nnacl/fp32/concat_fp32.h
@@ -22,8 +22,8 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
            int task_id, int thread_num);
 void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size,
            void *output, int task_id, int thread_num);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/constant_of_shape_fp32.c
+++ b/mindspore/lite/nnacl/fp32/constant_of_shape_fp32.c
@@ -16,7 +16,7 @@

 #include "nnacl/fp32/constant_of_shape_fp32.h"

 int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param) {
 int ConstantOfShape(float *output, int tid, const ConstantOfShapeParameter *param) {
  int size = param->unit_;
  float data = param->value_;
  int ind_st = MSMIN(tid * size, param->element_sz_);
@@ -27,7 +27,7 @@ int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param) {
  return NNACL_OK;
 }

 int ConstantOfShapeInt(int32_t *output, int tid, ConstantOfShapeParameter *param) {
 int ConstantOfShapeInt(int32_t *output, int tid, const ConstantOfShapeParameter *param) {
  int size = param->unit_;
  float data = param->value_;
  int ind_st = MSMIN(tid * size, param->element_sz_);
--- a/mindspore/lite/nnacl/fp32/constant_of_shape_fp32.h
+++ b/mindspore/lite/nnacl/fp32/constant_of_shape_fp32.h
@@ -33,8 +33,8 @@ typedef struct ConstantOfShapeParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
 int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param);
 int ConstantOfShapeInt(int32_t *output, int tid, ConstantOfShapeParameter *param);
 int ConstantOfShape(float *output, int tid, const ConstantOfShapeParameter *param);
 int ConstantOfShapeInt(int32_t *output, int tid, const ConstantOfShapeParameter *param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/conv_fp32.c
+++ b/mindspore/lite/nnacl/fp32/conv_fp32.c
@@ -22,7 +22,7 @@

 // fp32 conv common
 void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
              float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param) {
              float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) {
  int out_channel = conv_param->output_channel_;
  int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
  int output_count = conv_param->output_h_ * conv_param->output_w_;
@@ -61,8 +61,8 @@ void ConvFp32(const float *input_data, float *packed_input, const float *packed_

 // fp32 conv winograd
 void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data,
                      TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func,
                      OutputTransFunc out_func) {
                      TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param,
                      InputTransFunc in_func, OutputTransFunc out_func) {
  int in_channel = conv_param->input_channel_;
  int out_w_block = UP_DIV(conv_param->output_w_, conv_param->output_unit_);
  int out_h_block = UP_DIV(conv_param->output_h_, conv_param->output_unit_);
--- a/mindspore/lite/nnacl/fp32/conv_fp32.h
+++ b/mindspore/lite/nnacl/fp32/conv_fp32.h
@@ -35,12 +35,12 @@ extern "C" {

 // fp32 convolution common (im2col+gemm)
 void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
              float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param);
              float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param);

 // fp32 convolution winograd
 void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data,
                      TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func,
                      OutputTransFunc out_func);
                      TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param,
                      InputTransFunc in_func, OutputTransFunc out_func);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/crop_fp32.c
+++ b/mindspore/lite/nnacl/fp32/crop_fp32.c
@@ -18,7 +18,7 @@
 #include "nnacl/op_base.h"
 #include "nnacl/crop_parameter.h"

 void Pad4DOffset(CropParameter *crop_param, int64_t *offset) {
 void Pad4DOffset(const CropParameter *crop_param, int64_t *offset) {
  int axis = crop_param->axis_;
  for (int i = DIMENSION_4D - 1; i >= 0; --i) {
    int offset_index = i - axis;
@@ -30,8 +30,8 @@ void Pad4DOffset(CropParameter *crop_param, int64_t *offset) {
  }
 }

 void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param,
            int thread_id) {
 void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape,
            const CropParameter *crop_param, int thread_id) {
  int64_t offset_pad[DIMENSION_4D];
  Pad4DOffset(crop_param, offset_pad);
  int out_shape1 = out_shape[1];
@@ -66,7 +66,7 @@ void Crop4D(const float *input, float *output, const int *in_shape, const int *o
 }

 void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape,
                      CropParameter *crop_param) {
                      const CropParameter *crop_param) {
  int64_t offset_pad[DIMENSION_4D];
  Pad4DOffset(crop_param, offset_pad);
  size_t in_dim2_stride = in_shape[3];
--- a/mindspore/lite/nnacl/fp32/crop_fp32.h
+++ b/mindspore/lite/nnacl/fp32/crop_fp32.h
@@ -23,10 +23,10 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param,
            int thread_id);
 void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape,
            const CropParameter *crop_param, int thread_id);
 void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape,
                      CropParameter *crop_param);
                      const CropParameter *crop_param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/deconv_fp32.c
+++ b/mindspore/lite/nnacl/fp32/deconv_fp32.c
@@ -34,7 +34,7 @@ void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, in
 }

 void DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *dst, int output_channel,
                      ConvParameter *conv_param) {
                      const ConvParameter *conv_param) {
  /* arm64 row12x8-major(ih*iw x oc*kh*kw)  ->  row8-major(oh*ow x oc) */
  /* arm32 row4x8-major(ih*iw x oc*kh*kw)   ->  row8-major(oh*ow x oc) */
  size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
--- a/mindspore/lite/nnacl/fp32/deconv_fp32.h
+++ b/mindspore/lite/nnacl/fp32/deconv_fp32.h
@@ -30,7 +30,7 @@ extern "C" {
 #endif
 void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, int output_channel, int plane);
 void DeConvPostFp32C8(const float *src, float *tmp_out, const float *bias, float *dst, int output_channel,
                      ConvParameter *conv_param);
                      const ConvParameter *conv_param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/deconv_winograd_fp32.c
+++ b/mindspore/lite/nnacl/fp32/deconv_winograd_fp32.c
@@ -16,8 +16,8 @@

 #include "nnacl/fp32/deconv_winograd_fp32.h"

 int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param,
                         DeConvParam *deconv_param) {
 int PackDeConvWgDataFp32(const float *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param,
                         const DeConvParam *deconv_param) {
  int tmp_kernel_plane = unit->w_size_ * unit->h_size_;
  int size = conv_param->input_channel_ * conv_param->output_channel_ * tmp_kernel_plane;
  float *current_unit_weight = (float *)malloc(size * sizeof(float));
@@ -25,13 +25,14 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame
    return NNACL_NULL_PTR;
  }
  for (int ic = 0; ic < conv_param->input_channel_; ic++) {
    float *src_ic = nhwc_weight + deconv_param->kernel_plane_ * conv_param->output_channel_ * ic;
    const float *src_ic = nhwc_weight + deconv_param->kernel_plane_ * conv_param->output_channel_ * ic;
    float *dst_ic = current_unit_weight + tmp_kernel_plane * conv_param->output_channel_ * ic;
    for (int uhi = 0; uhi < unit->h_size_; uhi++) {
      for (int uwi = 0; uwi < unit->w_size_; uwi++) {
        int src_h_offset = unit->h_start_ + uhi * conv_param->stride_h_;
        int src_w_offset = unit->w_start_ + uwi * conv_param->stride_w_;
        float *src_hw = src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * conv_param->output_channel_;
        const float *src_hw =
          src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * conv_param->output_channel_;
        float *dst_hw = dst_ic + (uhi * unit->w_size_ + uwi) * conv_param->output_channel_;
        memcpy(dst_hw, src_hw, conv_param->output_channel_ * sizeof(float));
      }
@@ -132,10 +133,10 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame
  return NNACL_OK;
 }

 void DeConvWgInputPack(float *src_ptr, float *dst_ptr, int channel, int stride) {
 void DeConvWgInputPack(const float *src_ptr, float *dst_ptr, int channel, int stride) {
  int ic4div = channel / C4NUM;
  int ic4mod = channel % C4NUM;
  float *src = src_ptr;
  const float *src = src_ptr;
  float *dst = dst_ptr;

  for (int ic = 0; ic < ic4div; ic++) {
@@ -340,9 +341,10 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s
  return;
 }

 void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, const float *at_buf,
                       float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, float *b_tmp_buf,
                       int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) {
 void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, const float *weight_buf, float *tmp_buf,
                       const float *at_buf, float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf,
                       float *b_tmp_buf, int unit_size, int w_start, int h_start, const ConvParameter *conv_param,
                       const DeConvParam *deconv_param) {
  int winograd_plane = unit_size * unit_size;
  if (!transfered[unit_size]) {
    WinogradTransLeft(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, DECONV_WINOGRAD_DEFAULT_UNIT,
@@ -355,7 +357,7 @@ void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf,
  for (int index = 0; index < winograd_plane; index++) {
    float *src = trans_a_buf + index * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
    float *dst = tmp_buf + index * deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
    float *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_;
    const float *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_;
    TiledC4MatmulFp32(dst, src, weight, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM, deconv_param->ic_div4_,
                      deconv_param->oc_div4_);
  }
@@ -380,15 +382,16 @@ void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf,
  return;
 }

 void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start,
                         int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) {
 void DeConvWgCalCommFp32(const float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start,
                         int w_start, int h_size, int w_size, const ConvParameter *conv_param,
                         const DeConvParam *deconv_param) {
  int count = deconv_param->oc_div4_ * w_size * h_size;
  int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
  int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_;

  for (int hi = 0; hi < DECONV_WINOGRAD_DEFAULT_UNIT; hi++) {
    for (int wi = 0; wi < DECONV_WINOGRAD_DEFAULT_UNIT; wi++) {
      float *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride;
      const float *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride;
      TiledC4MatmulFp32(tmp_buf, src_in, weight, DECONV_WINOGRAD_DEFAULT_TILE * 4, deconv_param->ic_div4_, count);

      for (int uhi = 0; uhi < h_size; uhi++) {
@@ -406,8 +409,8 @@ void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, f
  return;
 }

 void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count,
              ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) {
 void DeconvWg(const float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count,
              const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) {
  /* pack tile input */
  int tile_in_unit_stride = deconv_param->ic_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
 #ifdef ENABLE_ARM
@@ -439,7 +442,7 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind
          continue;
        }

        float *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_;
        const float *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_;
        DeConvWgInputPack(src, dst, conv_param->input_channel_, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM);
      }
    }
@@ -474,8 +477,8 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind
  return;
 }

 void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_param, DeConvParam *deconv_param,
                  int calculate_count, int tile_index) {
 void DeconvWgPost(const float *tile_out, float *nc4hw4_output, const ConvParameter *conv_param,
                  const DeConvParam *deconv_param, int calculate_count, int tile_index) {
  /* merge */
  int src_unit_stride = deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;

@@ -483,7 +486,7 @@ void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_par
  int dst_stride = conv_param->output_w_ * conv_param->output_h_ * C4NUM;

  for (int index = 0; index < calculate_count; ++index) {
    float *src_start = tile_out + index * C4NUM;
    const float *src_start = tile_out + index * C4NUM;

    int plane_index = tile_index * DECONV_WINOGRAD_DEFAULT_TILE + index;
    int w_unit_index = plane_index % deconv_param->in_tile_w_count_;
@@ -499,7 +502,7 @@ void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_par

    for (int hi = merge_h_start; hi < merge_h_end; hi++) {
      for (int wi = merge_w_start; wi < merge_w_end; wi++) {
        float *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride;
        const float *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride;
        float *dst = dst_start + (hi * conv_param->output_w_ + wi) * C4NUM;
        DeConvWgMerge(src, dst, src_stride, dst_stride, deconv_param->oc_div4_);
      }
--- a/mindspore/lite/nnacl/fp32/deconv_winograd_fp32.h
+++ b/mindspore/lite/nnacl/fp32/deconv_winograd_fp32.h
@@ -28,12 +28,12 @@
 extern "C" {
 #endif

 int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param,
                         DeConvParam *deconv_param);
 void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count,
              ConvParameter *conv_param, DeConvParam *deconv_param, int task_id);
 void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_param, DeConvParam *deconv_param,
                  int calculate_count, int tile_index);
 int PackDeConvWgDataFp32(const float *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param,
                         const DeConvParam *deconv_param);
 void DeconvWg(const float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count,
              const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id);
 void DeconvWgPost(const float *tile_out, float *nc4hw4_output, const ConvParameter *conv_param,
                  const DeConvParam *deconv_param, int calculate_count, int tile_index);
 void TiledC4MatmulFp32(float *dst, const float *src, const float *weight, size_t ic4, size_t cal_num, size_t oc4);

 #ifdef __cplusplus
--- a/mindspore/lite/nnacl/fp32/detection_post_process_fp32.c
+++ b/mindspore/lite/nnacl/fp32/detection_post_process_fp32.c
@@ -36,8 +36,8 @@ float IntersectionOverUnion(const BboxCorner *a, const BboxCorner *b) {
  return inter / (area_a + area_b - inter);
 }

 int DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anchors,
                DetectionPostProcessParameter *param) {
 int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors,
                const DetectionPostProcessParameter *param) {
  if (input_boxes == NULL || anchors == NULL || param == NULL) {
    return NNACL_NULL_PTR;
  }
--- a/mindspore/lite/nnacl/fp32/detection_post_process_fp32.h
+++ b/mindspore/lite/nnacl/fp32/detection_post_process_fp32.h
@@ -37,8 +37,8 @@ typedef struct {
 #ifdef __cplusplus
 extern "C" {
 #endif
 int DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anchors,
                DetectionPostProcessParameter *param);
 int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors,
                const DetectionPostProcessParameter *param);

 int NmsMultiClassesFastCore(const int num_boxes, const int num_classes_with_bg, const float *input_scores,
                            void (*)(const float *, int *, int, int), const DetectionPostProcessParameter *param,
--- a/mindspore/lite/nnacl/fp32/elu_fp32.c
+++ b/mindspore/lite/nnacl/fp32/elu_fp32.c
@@ -18,11 +18,11 @@
 #include <math.h>
 #include "nnacl/errorcode.h"

 void Calculate_Data(const float *input_data, float *output_data, int num, EluParameter *parameter) {
 void Calculate_Data(const float *input_data, float *output_data, int num, const EluParameter *parameter) {
  output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num];
 }

 int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) {
 int Elu(const float *input_data, float *output_data, const EluParameter *parameter, int task_id) {
  for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) {
    Calculate_Data(input_data, output_data, i, parameter);
  }
--- a/mindspore/lite/nnacl/fp32/elu_fp32.h
+++ b/mindspore/lite/nnacl/fp32/elu_fp32.h
@@ -28,7 +28,7 @@ typedef struct EluParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
 int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id);
 int Elu(const float *input_data, float *output_data, const EluParameter *parameter, int task_id);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c
+++ b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.c
@@ -31,7 +31,8 @@ void l2_regulate(float *data, int size, float max_norm) {
  return;
 }

 int CopyData(float *input_data, int *ids, float *output_data, int num, EmbeddingLookupParameter *parameter) {
 int CopyData(float *input_data, const int *ids, float *output_data, int num,
             const EmbeddingLookupParameter *parameter) {
  if (ids[num] >= parameter->layer_num_ || ids[num] < 0) {
    return NNACL_ERRCODE_INDEX_OUT_OF_RANGE;
  }
@@ -46,7 +47,8 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding
  return NNACL_OK;
 }

 int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) {
 int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const EmbeddingLookupParameter *parameter,
                    int task_id) {
  for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
    int ret = CopyData(input_data, ids, output_data, i, parameter);
    if (ret != NNACL_OK) {
--- a/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h
+++ b/mindspore/lite/nnacl/fp32/embedding_lookup_fp32.h
@@ -31,7 +31,8 @@ typedef struct EmbeddingLookupParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
 int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id);
 int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const EmbeddingLookupParameter *parameter,
                    int task_id);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/exp_fp32.c
+++ b/mindspore/lite/nnacl/fp32/exp_fp32.c
@@ -19,7 +19,7 @@
 #include <string.h>
 #include "nnacl/errorcode.h"

 int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id) {
 int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id) {
  if (parameter->scale_ == 1) {
    for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) {
      output_data[i] = expf(input_data[i]);
--- a/mindspore/lite/nnacl/fp32/exp_fp32.h
+++ b/mindspore/lite/nnacl/fp32/exp_fp32.h
@@ -33,7 +33,7 @@ typedef struct ExpParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
 int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id);
 int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id);
 void ExpFp32(const float *src, float *dst, int num);
 #ifdef __cplusplus
 }
--- a/mindspore/lite/nnacl/fp32/gather_fp32.c
+++ b/mindspore/lite/nnacl/fp32/gather_fp32.c
@@ -26,10 +26,10 @@ inline int Stride(const int *shape, int rank, int index) {
  return stride;
 }

 int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
 int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
           float *output) {
  for (int m = 0; m < outer_size; ++m) {
    float *inputm = input + inner_size * m * limit;
    const float *inputm = input + inner_size * m * limit;
    float *outputm = output + inner_size * m * indices_element_size;
    for (int i = 0; i < indices_element_size; ++i) {
      if (indices[i] < 0 || indices[i] > limit) {
--- a/mindspore/lite/nnacl/fp32/gather_fp32.h
+++ b/mindspore/lite/nnacl/fp32/gather_fp32.h
@@ -22,7 +22,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
 int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
           float *output);
 int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices,
                int indices_element_size, int32_t *output);
--- a/mindspore/lite/nnacl/fp32/instance_norm_fp32.c
+++ b/mindspore/lite/nnacl/fp32/instance_norm_fp32.c
@@ -18,9 +18,8 @@
 #include "nnacl/errorcode.h"
 #include "nnacl/op_base.h"

 int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data,
                 const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id,
                 const int thread_num) {
 int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data,
                 const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num) {
  if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) {
    return NNACL_NULL_PTR;
  }
--- a/mindspore/lite/nnacl/fp32/instance_norm_fp32.h
+++ b/mindspore/lite/nnacl/fp32/instance_norm_fp32.h
@@ -23,9 +23,8 @@
 extern "C" {
 #endif

 int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data,
                 const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id,
                 const int thread_num);
 int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data,
                 const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/layer_norm_fp32.c
+++ b/mindspore/lite/nnacl/fp32/layer_norm_fp32.c
@@ -18,9 +18,8 @@
 #include "nnacl/errorcode.h"
 #include "nnacl/op_base.h"

 int LayerNorm(const int outer_size, const int inner_size, const float *src_data, const float *gamma_data,
              const float *beta_data, const bool affine, const float epsilon, float *dst_data, const int tid,
              const int thread_num) {
 int LayerNorm(int outer_size, int inner_size, const float *src_data, const float *gamma_data, const float *beta_data,
              bool affine, float epsilon, float *dst_data, int tid, int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
--- a/mindspore/lite/nnacl/fp32/layer_norm_fp32.h
+++ b/mindspore/lite/nnacl/fp32/layer_norm_fp32.h
@@ -23,9 +23,8 @@
 extern "C" {
 #endif

 int LayerNorm(const int outer_size, const int inner_size, const float *src_data, const float *gamma_data,
              const float *beta_data, const bool affine, const float epsilon, float *dst_data, const int tid,
              const int thread_num);
 int LayerNorm(int outer_size, int inner_size, const float *src_data, const float *gamma_data, const float *beta_data,
              bool affine, float epsilon, float *dst_data, int tid, int thread_num);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/local_response_norm_fp32.c
+++ b/mindspore/lite/nnacl/fp32/local_response_norm_fp32.c
@@ -17,15 +17,15 @@
 #include "nnacl/fp32/local_response_norm_fp32.h"
 #include <math.h>

 int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr,
                      LocalResponseNormParameter *param) {
 int LocalResponseNorm(const float *input_ptr, int out_size, int channel, float *output_ptr,
                      const LocalResponseNormParameter *param) {
  int depth_radius = param->depth_radius_;
  float bias = param->bias_;
  float alpha = param->alpha_;
  float beta = param->beta_;

  for (int i = 0; i < out_size; i++) {
    float *in_data = input_ptr + i * channel;
    const float *in_data = input_ptr + i * channel;
    float *out_data = output_ptr + i * channel;

    for (int j = 0; j < channel; j++) {
--- a/mindspore/lite/nnacl/fp32/local_response_norm_fp32.h
+++ b/mindspore/lite/nnacl/fp32/local_response_norm_fp32.h
@@ -30,8 +30,8 @@ typedef struct LocalResponseNormParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
 int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr,
                      LocalResponseNormParameter *param);
 int LocalResponseNorm(const float *input_ptr, int out_size, int channel, float *output_ptr,
                      const LocalResponseNormParameter *param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/lstm_fp32.c
+++ b/mindspore/lite/nnacl/fp32/lstm_fp32.c
@@ -19,7 +19,7 @@
 #include "nnacl/fp32/activation_fp32.h"
 #include "nnacl/fp32/arithmetic_fp32.h"

 void InitGate(float *gate_buffer, const float *bias, LstmParameter *lstm_parm) {
 void InitGate(float *gate_buffer, const float *bias, const LstmParameter *lstm_parm) {
  int gate_offest = 0;
  for (int l = 0; l < 4; l++) {
    int batch_offest = gate_offest;
@@ -94,7 +94,7 @@ void LstmStepUnit(float *output, const float *input, const float *input_input_we
                  const float *input_cell_weight, const float *input_output_weight, const float *state_input_weight,
                  const float *state_forget_weight, const float *state_cell_weight, const float *state_output_weight,
                  const float *bias, float *hidden_state, float *cell_state, float *gate_buffer,
                  LstmParameter *lstm_parm) {
                  const LstmParameter *lstm_parm) {
  InitGate(gate_buffer, bias, lstm_parm);

  float *input_gate = gate_buffer;
@@ -139,7 +139,7 @@ void LstmStepUnit(float *output, const float *input, const float *input_input_we
 }

 void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias,
          float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm) {
          float *hidden_state, float *cell_state, float *gate_buffer, const LstmParameter *lstm_parm) {
  // forward
  const float *input_input_weight = weight_i;
  const float *input_forget_weight = weight_i + lstm_parm->input_size_ * lstm_parm->hidden_size_ * 2;
--- a/mindspore/lite/nnacl/fp32/lstm_fp32.h
+++ b/mindspore/lite/nnacl/fp32/lstm_fp32.h
@@ -34,7 +34,7 @@ typedef struct LstmParameter {
 extern "C" {
 #endif
 void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias,
          float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm);
          float *hidden_state, float *cell_state, float *gate_buffer, const LstmParameter *lstm_parm);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/pad_fp32.c
+++ b/mindspore/lite/nnacl/fp32/pad_fp32.c
@@ -18,7 +18,7 @@
 #include "nnacl/common_func.h"

 void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
         const int *paddings, const int tid, const int thread_num) {
         const int *paddings, int tid, int thread_num) {
  int in[4], out[4];
  for (in[0] = 0; in[0] < input_shape[0]; in[0]++) {
    out[0] = in[0] + paddings[0];
--- a/mindspore/lite/nnacl/fp32/pad_fp32.h
+++ b/mindspore/lite/nnacl/fp32/pad_fp32.h
@@ -28,7 +28,7 @@
 extern "C" {
 #endif
 void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
         const int *paddings, const int tid, const int thread_num);
         const int *paddings, int tid, int thread_num);
 void MirrorPad(const float *input_data, float *output_data, const int *input_shape, const PadParameter *pad_param,
               int begin, int end);

--- a/mindspore/lite/nnacl/fp32/pooling_fp32.c
+++ b/mindspore/lite/nnacl/fp32/pooling_fp32.c
@@ -18,8 +18,8 @@
 #include <float.h>
 #include "nnacl/errorcode.h"

 int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf,
               float maxf) {
 int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id,
               float minf, float maxf) {
  int win_w = pooling_param->window_w_;
  int win_h = pooling_param->window_h_;
  int channel = pooling_param->input_channel_;
@@ -144,8 +144,8 @@ int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pool
  return NNACL_OK;
 }

 void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf,
                float maxf) {
 void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id,
                float minf, float maxf) {
  int win_w = pooling_param->window_w_;
  int win_h = pooling_param->window_h_;
  int channel = pooling_param->input_channel_;
--- a/mindspore/lite/nnacl/fp32/pooling_fp32.h
+++ b/mindspore/lite/nnacl/fp32/pooling_fp32.h
@@ -27,10 +27,10 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf,
               float maxf);
 void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf,
                float maxf);
 int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id,
               float minf, float maxf);
 void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id,
                float minf, float maxf);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/prelu_fp32.c
+++ b/mindspore/lite/nnacl/fp32/prelu_fp32.c
@@ -18,7 +18,7 @@
 #include <arm_neon.h>
 #endif

 void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_id) {
 void PRelu(float *input, float *output, const PReluParameter *prelu_param_, int task_id) {
  float *negetive_slope_value = prelu_param_->slope_;
  int c4 = prelu_param_->channel_num_ / C4NUM;
  int channel_num = prelu_param_->channel_num_;
@@ -81,7 +81,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i
        int c4_offset = tile_offset + k * C4NUM;
        int slope_offset = k * C4NUM;
        for (int l = 0; l < C4NUM; ++l) {
          float in_data = input_ptr[c4_offset + l];
          const float in_data = input_ptr[c4_offset + l];
          output_ptr[c4_offset + l] =
            (in_data < 0 ? in_data : 0) * negetive_slope_value[slope_offset + l] + (in_data > 0 ? in_data : 0);
        }
@@ -93,7 +93,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i
      int offset = m * channel_num;
      for (int k = c_s; k < channel_num; ++k) {
        int c4_offset = offset + k;
        float in_data = input_ptr[c4_offset];
        const float in_data = input_ptr[c4_offset];
        if (in_data >= 0) {
          output_ptr[c4_offset] = in_data;
        } else {
@@ -104,7 +104,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i
  }
 }

 void PReluShareChannel(float *input, float *output, PReluParameter *prelu_param_, int task_id) {
 void PReluShareChannel(float *input, float *output, const PReluParameter *prelu_param_, int task_id) {
  for (int j = task_id; j < prelu_param_->tile_block_; j += prelu_param_->op_parameter_.thread_num_) {
    int cal_index;
 #ifdef ENABLE_NEON
--- a/mindspore/lite/nnacl/fp32/prelu_fp32.h
+++ b/mindspore/lite/nnacl/fp32/prelu_fp32.h
@@ -22,9 +22,9 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_id);
 void PRelu(float *input, float *output, const PReluParameter *prelu_param_, int task_id);

 void PReluShareChannel(float *input, float *output, PReluParameter *prelu_param_, int task_id);
 void PReluShareChannel(float *input, float *output, const PReluParameter *prelu_param_, int task_id);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/reduce_fp32.c
+++ b/mindspore/lite/nnacl/fp32/reduce_fp32.c
@@ -23,8 +23,8 @@
 #include "nnacl/reduce_parameter.h"
 #endif

 int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
               const int tid, const int thread_num) {
 int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
               int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
@@ -44,8 +44,8 @@ int ReduceMean(const int outer_size, const int inner_size, const int axis_size,
  }
  return NNACL_OK;
 }
 int ReduceSum(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
              const int tid, const int thread_num) {
 int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
              int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
@@ -81,8 +81,8 @@ int ReduceSum(const int outer_size, const int inner_size, const int axis_size, c
  }
  return NNACL_OK;
 }
 int ReduceMax(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
              const int tid, const int thread_num) {
 int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
              int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
@@ -102,8 +102,8 @@ int ReduceMax(const int outer_size, const int inner_size, const int axis_size, c
  }
  return NNACL_OK;
 }
 int ReduceMin(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
              const int tid, const int thread_num) {
 int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
              int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
@@ -123,8 +123,8 @@ int ReduceMin(const int outer_size, const int inner_size, const int axis_size, c
  }
  return NNACL_OK;
 }
 int IntReduceMin(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data,
                 const int tid, const int thread_num) {
 int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
                 int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
@@ -144,8 +144,8 @@ int IntReduceMin(const int outer_size, const int inner_size, const int axis_size
  }
  return NNACL_OK;
 }
 int ReduceProd(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
               const int tid, const int thread_num) {
 int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
               int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
@@ -166,8 +166,8 @@ int ReduceProd(const int outer_size, const int inner_size, const int axis_size,
  return NNACL_OK;
 }

 int IntReduceProd(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data,
                  const int tid, const int thread_num) {
 int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
                  int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
@@ -190,8 +190,8 @@ int IntReduceProd(const int outer_size, const int inner_size, const int axis_siz
  }
  return NNACL_OK;
 }
 int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
                    float *dst_data, const int tid, const int thread_num) {
 int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
                    int thread_num) {
  if (src_data == NULL || dst_data == NULL) {
    return NNACL_NULL_PTR;
  }
--- a/mindspore/lite/nnacl/fp32/reduce_fp32.h
+++ b/mindspore/lite/nnacl/fp32/reduce_fp32.h
@@ -22,22 +22,22 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
               const int tid, const int thread_num);
 int ReduceSum(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
              const int tid, const int thread_num);
 int ReduceMax(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
              const int tid, const int thread_num);
 int ReduceMin(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
              const int tid, const int thread_num);
 int IntReduceMin(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data,
                 const int tid, const int thread_num);
 int ReduceProd(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
               const int tid, const int thread_num);
 int IntReduceProd(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data,
                  const int tid, const int thread_num);
 int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
                    float *dst_data, const int tid, const int thread_num);
 int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
               int thread_num);
 int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
              int thread_num);
 int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
              int thread_num);
 int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
              int thread_num);
 int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
                 int thread_num);
 int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
               int thread_num);
 int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
                  int thread_num);
 int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
                    int thread_num);

 #ifdef ENABLE_NNACL_INFER_SHAPE
 int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
--- a/mindspore/lite/nnacl/fp32/roi_pooling_fp32.c
+++ b/mindspore/lite/nnacl/fp32/roi_pooling_fp32.c
@@ -20,7 +20,8 @@
 #include "nnacl/errorcode.h"
 #include "nnacl/op_base.h"

 int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param) {
 int ROIPooling(const float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid,
               const ROIPoolingParameter *param) {
  int num_rois = param->output_n_;
  int units = UP_DIV(num_rois, param->thread_num_);
  int roi_st = tid * units;
@@ -52,7 +53,7 @@ int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, in

    float bin_size_h = (float)roi_height / (float)pooled_height;
    float bin_size_w = (float)roi_width / (float)pooled_width;
    float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind;
    const float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind;

    for (int ph = 0; ph < pooled_height; ++ph) {
      for (int pw = 0; pw < pooled_width; ++pw) {
--- a/mindspore/lite/nnacl/fp32/roi_pooling_fp32.h
+++ b/mindspore/lite/nnacl/fp32/roi_pooling_fp32.h
@@ -40,7 +40,8 @@ typedef struct ROIPoolingParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
 int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param);
 int ROIPooling(const float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid,
               const ROIPoolingParameter *param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/scale_fp32.c
+++ b/mindspore/lite/nnacl/fp32/scale_fp32.c
@@ -66,7 +66,7 @@ void ScaleAxis(const float *in_data, float *out_data, const float *scale, const
 }

 void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
             ScaleParameter *scale_param) {
             const ScaleParameter *scale_param) {
  int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
  int outer_start = task_id * outer_step;
  int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
@@ -137,7 +137,7 @@ void ScaleAxisRelu(const float *in_data, float *out_data, const float *scale, co
 }

 void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
                 ScaleParameter *scale_param) {
                 const ScaleParameter *scale_param) {
  int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
  int outer_start = task_id * outer_step;
  int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
@@ -210,7 +210,7 @@ void ScaleAxisRelu6(const float *in_data, float *out_data, const float *scale, c
 }

 void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
                  ScaleParameter *scale_param) {
                  const ScaleParameter *scale_param) {
  int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
  int outer_start = task_id * outer_step;
  int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
--- a/mindspore/lite/nnacl/fp32/scale_fp32.h
+++ b/mindspore/lite/nnacl/fp32/scale_fp32.h
@@ -23,11 +23,11 @@
 extern "C" {
 #endif
 void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
             ScaleParameter *scale_param);
             const ScaleParameter *scale_param);
 void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
                 ScaleParameter *scale_param);
                 const ScaleParameter *scale_param);
 void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
                  ScaleParameter *scale_param);
                  const ScaleParameter *scale_param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/slice_fp32.c
+++ b/mindspore/lite/nnacl/fp32/slice_fp32.c
@@ -46,7 +46,7 @@ void PadSliceParameterTo4D(SliceParameter *param) {
  param->param_length_ = DIMENSION_4D;
 }

 void DoSlice(const float *input, float *output, SliceParameter *param, int thread_id) {
 void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id) {
  int32_t out_dim1 = param->size_[1];
  int32_t out_dim2 = param->size_[2];
  int32_t out_dim3 = param->size_[3];
@@ -78,7 +78,7 @@ void DoSlice(const float *input, float *output, SliceParameter *param, int threa
  }
 }

 void DoSliceNoParallel(const float *input, float *output, SliceParameter *param) {
 void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param) {
  size_t copy_size = param->size_[3] * sizeof(float);
  size_t in_stride2 = param->shape_[3];
  size_t in_stride1 = param->shape_[2] * in_stride2;
--- a/mindspore/lite/nnacl/fp32/slice_fp32.h
+++ b/mindspore/lite/nnacl/fp32/slice_fp32.h
@@ -23,8 +23,8 @@
 extern "C" {
 #endif
 void PadSliceParameterTo4D(SliceParameter *param);
 void DoSlice(const float *input, float *output, SliceParameter *param, int thread_id);
 void DoSliceNoParallel(const float *input, float *output, SliceParameter *param);
 void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id);
 void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/softmax_fp32.c
+++ b/mindspore/lite/nnacl/fp32/softmax_fp32.c
@@ -88,10 +88,10 @@ void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel) {
 }

 // output = exp(input) / reduce_sum(exp(input), axis)
 void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, SoftmaxParameter *parameter) {
 void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, const SoftmaxParameter *parameter) {
  int axis = parameter->axis_;
  int n_dim = parameter->n_dim_;
  int *input_shape = parameter->input_shape_;
  const int *input_shape = parameter->input_shape_;
  int inner_size = 1;
  int outter_size = 1;

--- a/mindspore/lite/nnacl/fp32/softmax_fp32.h
+++ b/mindspore/lite/nnacl/fp32/softmax_fp32.h
@@ -22,7 +22,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, SoftmaxParameter *parameter);
 void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, const SoftmaxParameter *parameter);
 void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel);
 #ifdef __cplusplus
 }
--- a/mindspore/lite/nnacl/pack.c
+++ b/mindspore/lite/nnacl/pack.c
@@ -214,7 +214,7 @@ void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, int32_t *filt
  return;
 }

 void Im2ColPackUnitFp32(const float *input_data, ConvParameter *conv_param, float *packed_input, int real_cal_num,
 void Im2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, int real_cal_num,
                        int block_index) {
  // input format : nhwc
  int kernel_h = conv_param->kernel_h_;
--- a/mindspore/lite/nnacl/pack.h
+++ b/mindspore/lite/nnacl/pack.h
@@ -27,7 +27,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
 void Im2ColPackUnitFp32(const float *input_data, ConvParameter *conv_param, float *packed_input, int real_cal_num,
 void Im2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, int real_cal_num,
                        int block_index);

 void PackHWCToWHC(const float *src, float *dst, int height, int width, int channel);
--- a/mindspore/lite/nnacl/winograd_transform.c
+++ b/mindspore/lite/nnacl/winograd_transform.c
@@ -18,7 +18,8 @@

 // fp32 conv winograd
 void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num,
                            int out_tile_index, int out_w_block_num, ConvParameter *conv_param, InputTransFunc func) {
                            int out_tile_index, int out_w_block_num, const ConvParameter *conv_param,
                            InputTransFunc func) {
  int input_unit = conv_param->input_unit_;
  int output_unit = conv_param->output_unit_;
  int in_channel = conv_param->input_channel_;
@@ -96,7 +97,8 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
 }

 void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
                             int out_tile_index, int output_unit_num, ConvParameter *conv_param, OutputTransFunc func) {
                             int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
                             OutputTransFunc func) {
  int output_unit = conv_param->output_unit_;
  int output_w = conv_param->output_w_;
  int output_h = conv_param->output_h_;
--- a/mindspore/lite/nnacl/winograd_transform.h
+++ b/mindspore/lite/nnacl/winograd_transform.h
@@ -33,10 +33,12 @@ extern "C" {
 #endif
 // for fp32 winograd input/output transform
 void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num,
                            int out_tile_index, int out_w_block_num, ConvParameter *conv_param, InputTransFunc func);
                            int out_tile_index, int out_w_block_num, const ConvParameter *conv_param,
                            InputTransFunc func);

 void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
                             int out_tile_index, int output_unit_num, ConvParameter *conv_param, OutputTransFunc func);
                             int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
                             OutputTransFunc func);

 // for int8 convolution 3x3 filter/input/output transform
 void Conv3x3Int8InputUnit(int16_t *tmp_data, int16_t *trans_input_data, size_t step, int input_zp);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
@@ -46,8 +46,8 @@ int ConcatCPUKernel::ReSize() { return ConcatBaseCPUKernel::ReSize(); }

 int ConcatCPUKernel::DoConcat(int task_id) {
  auto input_num = in_tensors_.size();
  std::vector<void *> inputs_addr(input_num, nullptr);
  std::vector<int *> inputs_output_shape(input_num + 1, nullptr);
  std::vector<const void *> inputs_addr(input_num, nullptr);
  std::vector<const int *> inputs_output_shape(input_num + 1, nullptr);

  std::vector<std::vector<int>> shapes;
  for (size_t i = 0; i < input_num; ++i) {
@@ -59,8 +59,8 @@ int ConcatCPUKernel::DoConcat(int task_id) {
  inputs_output_shape[input_num] = output_shape.data();
  auto output_addr = out_tensors_.at(0)->MutableData();

  Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(),
         output_shape.size(), output_addr, task_id, thread_count_);
  Concat(inputs_addr.data(), input_num, axis_, inputs_output_shape.data(), output_shape.size(), output_addr, task_id,
         thread_count_);
  return RET_OK;
 }