!12156 [MS][LITE] add external libs cmsis

From: @zhujingxuan Reviewed-by: @wangchengyuan,@zhanghaibo5 Signed-off-by:
4 years ago · 2d2e824b77
--- a/mindspore/lite/micro/CMakeLists.txt
+++ b/mindspore/lite/micro/CMakeLists.txt
@@ -7,6 +7,7 @@ include(${TOP_DIR}/cmake/utils.cmake)
 include(${TOP_DIR}/cmake/dependency_utils.cmake)
 include(${TOP_DIR}/cmake/dependency_securec.cmake)
 include(${TOP_DIR}/cmake/external_libs/flatbuffers.cmake)
 include(${TOP_DIR}/cmake/external_libs/cmsis.cmake)
 set(FBS_FILES
        ${CMAKE_CURRENT_SOURCE_DIR}/../schema/model.fbs
--- a/mindspore/lite/micro/coder/CMakeLists.txt
+++ b/mindspore/lite/micro/coder/CMakeLists.txt
@@ -33,6 +33,10 @@ set(LITE_SRC
        ${LITE_DIR}/src/tensorlist.cc
        ${LITE_DIR}/src/tensor.cc
        ${LITE_DIR}/src/common/log_adapter.cc
        ${NNACL_DIR}/int8/quantize.c
        ${NNACL_DIR}/int8/pack_int8.c
        ${NNACL_DIR}/int8/matmul_int8.c
        ${NNACL_DIR}/int8/fixed_point.c
        )
 file(GLOB_RECURSE MICRO_ALLOCATOR RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
@@ -43,6 +47,14 @@ file(GLOB_RECURSE MICRO_GENERATOR RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        generator/*.cc
        )
 file(GLOB_RECURSE MICRO_OPCODERS_BASE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        ${CMAKE_CURRENT_SOURCE_DIR}/opcoders/base/*.cc
        )
 file(GLOB_RECURSE MICRO_OPCODERS_CMSIS_NN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        ${CMAKE_CURRENT_SOURCE_DIR}/opcoders/cmsis-nn/*.cc
        )
 file(GLOB_RECURSE MICRO_UTILS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        utils/*.cc
        )
@@ -62,6 +74,8 @@ set(MICRO_CODER_SRC
 list(APPEND MICRO_CODER_SRC
        ${MICRO_ALLOCATOR}
        ${MICRO_GENERATOR}
        ${MICRO_OPCODERS_BASE}
        ${MICRO_OPCODERS_CMSIS_NN}
        )
 add_executable(codegen main.cc
--- a/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.cc
@@ -0,0 +1,262 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/base/conv2d_base_coder.h"
 #include <string>
 #include <vector>
 #include "nnacl/winograd_utils.h"
 #include "nnacl/int8/quantize.h"
 #include "micro/coder/log.h"
 namespace {
 int MallocConvQuantParams(ConvQuantArg *quant_arg, size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) {
  MS_CHECK_TRUE(input_arg_num > 0, "invalid value of input_arg_num");
  MS_CHECK_TRUE(filter_arg_num > 0, "invalid value of filter_arg_num");
  MS_CHECK_TRUE(output_arg_num > 0, "invalid value of output_arg_num");
  quant_arg->input_quant_args_ = static_cast<QuantArg *>(malloc(input_arg_num * sizeof(struct QuantArg)));
  MS_CHECK_PTR(quant_arg->input_quant_args_);
  quant_arg->filter_quant_args_ = static_cast<QuantArg *>(malloc(filter_arg_num * sizeof(QuantArg)));
  MS_CHECK_PTR(quant_arg->filter_quant_args_);
  quant_arg->output_quant_args_ = static_cast<QuantArg *>(malloc(output_arg_num * sizeof(QuantArg)));
  MS_CHECK_PTR(quant_arg->output_quant_args_);
  return mindspore::lite::RET_OK;
 }
 }  // namespace
 namespace mindspore::lite::micro {
 string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) {
  string ret;
  if (src_format == schema::Format_NHWC && dst_format == schema::Format_NC4HW4) {
    ret = "PackNHWCToNC4HW4Fp32";
  } else if (src_format == schema::Format_NHWC && dst_format == schema::Format_NHWC4) {
    ret = "PackNHWCToNHWC4Fp32";
  } else if (src_format == schema::Format_NC4HW4 && dst_format == schema::Format_NHWC4) {
    ret = "PackNC4HW4ToNHWC4Fp32";
  } else if (src_format == schema::Format_NCHW && dst_format == schema::Format_NC4HW4) {
    ret = "PackNCHWToNC4HW4Fp32";
  } else if (src_format == schema::Format_NC4HW4 && dst_format == schema::Format_NHWC) {
    ret = "PackNC4HW4ToNHWCFp32";
  } else {
    MS_LOG(ERROR) << "Unsupported transform from " << schema::EnumNameFormat(src_format) << " to "
                  << schema::EnumNameFormat(dst_format);
  }
  return ret;
 }
 string Conv2DBaseCoder::LayoutTransformInt8(schema::Format src_format, schema::Format dst_format) {
  string ret;
  if (src_format == schema::Format_NHWC && dst_format == schema::Format_NHWC4) {
    ret = "PackNHWCToNHWC4Int8";
  } else {
    MS_LOG(ERROR) << "Unsupported transform from " << schema::EnumNameFormat(src_format) << " to "
                  << schema::EnumNameFormat(dst_format);
  }
  return ret;
 }
 string Conv2DBaseCoder::LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format) {
  string ret;
  switch (data_type) {
    case kNumberTypeInt8:
      ret = LayoutTransformInt8(src_format, dst_format);
      break;
    case kNumberTypeFloat32:
      ret = LayoutTransformFp32(src_format, dst_format);
      break;
    default:
      MS_LOG(WARNING) << "unsupported data type";
  }
  return ret;
 }
 int Conv2DBaseCoder::SetIfPerChannel() {
  auto input_channel = static_cast<size_t>(filter_tensor_->Channel());
  auto output_channel = static_cast<size_t>(filter_tensor_->Batch());
  uint8_t per_channel = 0b0;
  if (conv_quant_arg_->input_arg_num_ != kPerTensor) {
    MS_CHECK_TRUE(conv_quant_arg_->input_arg_num_ == input_channel,
                  "input per channel quant param length is not equal to input channel.");
    per_channel = per_channel | INPUT_PER_CHANNEL;
  }
  if (conv_quant_arg_->filter_arg_num_ != kPerTensor) {
    MS_CHECK_TRUE(conv_quant_arg_->filter_arg_num_ == output_channel,
                  "weight per channel quant param length is not equal to filter num.");
    per_channel = per_channel | FILTER_PER_CHANNEL;
  }
  if (conv_quant_arg_->output_arg_num_ != kPerTensor) {
    MS_CHECK_TRUE(conv_quant_arg_->output_arg_num_ != output_channel,
                  "output per channel quant param length is not equal to output channel.");
    per_channel = per_channel | OUTPUT_PER_CHANNEL;
  }
  conv_quant_arg_->per_channel_ = per_channel;
  return RET_OK;
 }
 int Conv2DBaseCoder::MallocQuantParam() {
  conv_quant_arg_ = &conv_param_->conv_quant_arg_;
  size_t input_arg_num = input_tensor_->quant_params().size();
  size_t filter_arg_num = filter_tensor_->quant_params().size();
  size_t output_arg_num = output_tensor_->quant_params().size();
  conv_quant_arg_->input_arg_num_ = input_arg_num;
  conv_quant_arg_->filter_arg_num_ = filter_arg_num;
  conv_quant_arg_->output_arg_num_ = output_arg_num;
  MallocConvQuantParams(conv_quant_arg_, input_arg_num, filter_arg_num, output_arg_num);
  return RET_OK;
 }
 int Conv2DBaseCoder::SetInputTensorQuantParam() {
  size_t in_arg_num = conv_quant_arg_->input_arg_num_;
  if (in_arg_num == kPerTensor) {
    QuantArg input_quant_arg = input_tensor_->quant_params().at(0);
    conv_quant_arg_->input_quant_args_[0].zp_ = input_quant_arg.zeroPoint;
    conv_quant_arg_->input_quant_args_[0].scale_ = input_quant_arg.scale;
    return RET_OK;
  } else {
    // per channel
    MS_LOG(ERROR) << "Not Support Per Channel for input now.";
    return RET_ERROR;
  }
 }
 int Conv2DBaseCoder::SetFilterTensorQuantParam() {
  size_t weight_arg_num = conv_quant_arg_->filter_arg_num_;
  if (weight_arg_num == kPerTensor) {
    QuantArg weight_quant_arg = filter_tensor_->quant_params().at(0);
    conv_quant_arg_->filter_quant_args_[0].zp_ = weight_quant_arg.zeroPoint;
    conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale;
  } else {
    std::vector<QuantArg> weight_quant_arg = filter_tensor_->quant_params();
    for (int i = 0; i < static_cast<int>(weight_arg_num); ++i) {
      conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint;
      conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale;
    }
  }
  return RET_OK;
 }
 int Conv2DBaseCoder::SetOutputTensorQuantParam() {
  size_t out_arg_num = conv_quant_arg_->output_arg_num_;
  if (out_arg_num == kPerTensor) {
    QuantArg output_quant_arg = output_tensor_->quant_params().at(0);
    conv_quant_arg_->output_quant_args_[0].zp_ = output_quant_arg.zeroPoint;
    conv_quant_arg_->output_quant_args_[0].scale_ = output_quant_arg.scale;
  } else {
    MS_LOG(ERROR) << "Not Support Per Channel for input now.";
    return RET_ERROR;
  }
  return RET_OK;
 }
 int Conv2DBaseCoder::SetQuantMultiplier() {
  // now only support weight tensor is per channel, others are per tensor.
  int weight_arg_num = kPerTensor;
  if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) {
    weight_arg_num = conv_quant_arg_->filter_arg_num_;
  }
  conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(weight_arg_num * sizeof(double)));
  MS_CHECK_PTR(conv_quant_arg_->real_multiplier_);
  conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
  MS_CHECK_PTR(conv_quant_arg_->left_shift_);
  conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
  MS_CHECK_PTR(conv_quant_arg_->right_shift_);
  conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
  MS_CHECK_PTR(conv_quant_arg_->quant_multiplier_);
  conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
  MS_CHECK_PTR(conv_quant_arg_->out_act_min_);
  conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
  MS_CHECK_PTR(conv_quant_arg_->out_act_max_);
  for (int i = 0; i < weight_arg_num; ++i) {
    const auto in_scale =
      static_cast<double>(conv_quant_arg_->input_quant_args_[0].scale_ * conv_quant_arg_->filter_quant_args_[i].scale_);
    double real_multiplier = in_scale / static_cast<double>(conv_quant_arg_->output_quant_args_[0].scale_);
    conv_quant_arg_->real_multiplier_[i] = real_multiplier;
    if (conv_quant_arg_->quant_multiplier_mode_ == Method_SinglePrecision) {
      QuantizeRoundParameterWithSinglePrecision(real_multiplier, &conv_quant_arg_->quant_multiplier_[i],
                                                &conv_quant_arg_->left_shift_[i], &conv_quant_arg_->right_shift_[i]);
    } else if (conv_quant_arg_->quant_multiplier_mode_ == Method_DoublePrecision) {
      QuantizeRoundParameterWithDoublePrecision(real_multiplier, &conv_quant_arg_->quant_multiplier_[i],
                                                &conv_quant_arg_->left_shift_[i], &conv_quant_arg_->right_shift_[i]);
    }
  }
  return RET_OK;
 }
 int Conv2DBaseCoder::CheckResizeValid() {
  // ===============check in channel================= //
  int32_t filter_in_channel = filter_tensor_->Channel();
  int32_t resize_in_channel = input_tensor_->Channel();
  MS_CHECK_TRUE(filter_in_channel == resize_in_channel,
                "Channel of resized input should be equal to in channel of filter.");
  return RET_OK;
 }
 int Conv2DBaseCoder::SetQuantParam() {
  MS_CHECK_RET_CODE(MallocQuantParam(), "Malloc quant param failed.");
  MS_CHECK_RET_CODE(SetInputTensorQuantParam(), "Set Input Tensor Quant Param Failed.");
  MS_CHECK_RET_CODE(SetFilterTensorQuantParam(), "Set Filter Tensor Quant Param Failed.");
  MS_CHECK_RET_CODE(SetOutputTensorQuantParam(), "Set Output Tensor Quant Param Failed.");
  MS_CHECK_RET_CODE(SetIfPerChannel(), "Set if per tensor channel failed.");
  MS_CHECK_RET_CODE(SetQuantMultiplier(), "Set Quant Multiplier Failed.");
  // now only consider per tensor for output
  MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_min_);
  MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_max_);
  MS_CHECK_PTR(conv_param_->conv_quant_arg_.output_quant_args_);
  bool relu = conv_param_->act_type_ == ActType_Relu;
  bool relu6 = conv_param_->act_type_ == ActType_Relu6;
  CalculateActivationRangeQuantized(relu, relu6, conv_param_->conv_quant_arg_.output_quant_args_[0].zp_,
                                    conv_param_->conv_quant_arg_.output_quant_args_[0].scale_,
                                    &conv_param_->conv_quant_arg_.out_act_min_[0],
                                    &conv_param_->conv_quant_arg_.out_act_max_[0]);
  return RET_OK;
 }
 int Conv2DBaseCoder::Init() {
  this->conv_param_ = reinterpret_cast<ConvParameter *>(parameter_);
  filter_tensor_ = input_tensors_.at(kWeightIndex);
  MS_CHECK_PTR(filter_tensor_);
  MS_CHECK_PTR(filter_tensor_->data_c());
  if (input_tensors_.size() == kInputSize2) {
    bias_tensor_ = input_tensors_.at(kBiasIndex);
    MS_CHECK_PTR(bias_tensor_);
    MS_CHECK_PTR(bias_tensor_->data_c());
  } else {
    MS_CHECK_TRUE(input_tensors_.size() == kInputSize1, "wrong input size");
  }
  conv_param_->input_batch_ = input_tensor_->Batch();
  conv_param_->input_h_ = input_tensor_->Height();
  conv_param_->input_w_ = input_tensor_->Width();
  conv_param_->input_channel_ = input_tensor_->Channel();
  conv_param_->output_batch_ = output_tensor_->Batch();
  conv_param_->output_h_ = output_tensor_->Height();
  conv_param_->output_w_ = output_tensor_->Width();
  conv_param_->output_channel_ = output_tensor_->Channel();
  return RET_OK;
 }
 int Conv2DBaseCoder::CheckLayout(lite::Tensor *input_tensor) {
  mindspore::TypeId data_type = input_tensor->data_type();
  schema::Format input_format = input_tensor->format();
  schema::Format execute_format = schema::Format_NHWC4;
  convert_func_ = LayoutTransform(data_type, input_format, execute_format);
  MS_CHECK_TRUE(!convert_func_.empty(), "layout convert func is nullptr.");
  return RET_OK;
 }
 }  // namespace mindspore::lite::micro
--- a/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.h
@@ -0,0 +1,90 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_CONV2D_BASE_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_CONV2D_BASE_CODER_H_
 #include <string>
 #include <vector>
 #include <utility>
 #include <memory>
 #include "micro/coder/opcoders/op_coder.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 #include "nnacl/conv_parameter.h"
 namespace mindspore::lite::micro {
 using std::string;
 class Conv2DBaseCoder : public OperatorCoder {
 public:
  Conv2DBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                  const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~Conv2DBaseCoder() override {
    if (conv_quant_arg_ == nullptr) {
      return;
    }
    free(conv_quant_arg_->real_multiplier_);
    free(conv_quant_arg_->left_shift_);
    free(conv_quant_arg_->right_shift_);
    free(conv_quant_arg_->quant_multiplier_);
    free(conv_quant_arg_->out_act_min_);
    free(conv_quant_arg_->out_act_max_);
    free(conv_quant_arg_->input_quant_args_);
    free(conv_quant_arg_->filter_quant_args_);
    free(conv_quant_arg_->output_quant_args_);
  }
 protected:
  int Init();
  int SetQuantParam();
  int MallocQuantParam();
  int SetInputTensorQuantParam();
  int SetFilterTensorQuantParam();
  int SetOutputTensorQuantParam();
  int SetQuantMultiplier();
  int CheckResizeValid();
  int SetIfPerChannel();
  int CheckLayout(lite::Tensor *input_tensor);
  string LayoutTransformFp32(schema::Format src_format, schema::Format dst_format);
  string LayoutTransformInt8(schema::Format src_format, schema::Format dst_format);
  string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format);
  ConvParameter *conv_param_{nullptr};
  ConvQuantArg *conv_quant_arg_{nullptr};
  Tensor *filter_tensor_{nullptr};
  Tensor *bias_tensor_{nullptr};
  string convert_func_;
 };
 }  // namespace mindspore::lite::micro
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_CONV2D_BASE_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/base/dtype_cast_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/base/dtype_cast_coder.cc
@@ -0,0 +1,95 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <string>
 #include "micro/coder/opcoders/op_coder.h"
 #include "micro/coder/opcoders/file_collector.h"
 #include "micro/coder/opcoders/base/dtype_cast_coder.h"
 #include "micro/coder/opcoders/serializers/serializer.h"
 using mindspore::schema::PrimitiveType_Cast;
 namespace mindspore::lite::micro {
 int DTypeCastCoder::Prepare(CoderContext *const context) {
  data_num_ = input_tensor_->ElementsNum();
  if (data_num_ == 0) {
    return RET_OK;
  }
  int thread_num = MSMIN(thread_num_, static_cast<int>(data_num_));
  MS_CHECK_TRUE(thread_num > 0, "thread_num <= 0");
  stride_ = UP_DIV(data_num_, thread_num);
  return RET_OK;
 }
 int DTypeCastCoder::DoCode(CoderContext *const context) {
  int task_id = 0;
  int data_num = MSMIN(stride_, data_num_ - task_id * stride_);
  if (data_num <= 0) {
    return RET_OK;
  }
  TypeId input_data_type = input_tensor_->data_type();
  TypeId output_data_type = output_tensor_->data_type();
  std::vector<std::string> asmFiles;
  if (target_ == kARM32A) {
    asmFiles = {"nnacl/assembly/arm32/PostFuncBiasReluC8.S", "nnacl/assembly/arm32/PostFuncBiasReluC4.S"};
  } else if (target_ == kARM64) {
    asmFiles = {"nnacl/assembly/arm64/PostFuncBiasReluC8.S", "nnacl/assembly/arm64/PostFuncBiasReluC4.S"};
  }
  Collect(context, {"nnacl/fp32/cast.h"}, {"nnacl/fp32/cast.c", "nnacl/fp32/common_func.c"}, asmFiles);
  Serializer code;
  if (output_data_type != kNumberTypeFloat32) {
    if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) {
      std::string input_str = allocator_->GetRuntimeAddr(input_tensor_);
      std::string output_str = allocator_->GetRuntimeAddr(output_tensor_);
      code << "\t\tfor (int i = 0; i < " << data_num << "; ++i) {\n";
      code << "\t\t\t(" << output_str << ")[i] = (" << input_str << ")[i];\n";
      code << "\t\t}\n";
      context->AppendCode(code.str());
      return RET_OK;
    } else if (input_data_type != kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) {
      code.CodeFunction("Float32ToInt32", input_tensor_, output_tensor_, data_num);
    } else if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeFloat16) {
      code.CodeFunction("Float32ToFp16", input_tensor_, output_tensor_, data_num);
    } else {
      MS_LOG(ERROR) << "Unsupported datatype from " << input_data_type << " to " << output_data_type;
      return RET_ERROR;
    }
  } else {
    switch (input_data_type) {
      case kNumberTypeUInt8:
        code.CodeFunction("Uint8ToFloat32", input_tensor_, output_tensor_, data_num);
        break;
      case kNumberTypeInt32:
        code.CodeFunction("Int32ToFloat32", input_tensor_, output_tensor_, data_num);
        break;
      case kNumberTypeFloat16:
        code.CodeFunction("Fp16ToFloat32", input_tensor_, output_tensor_, data_num);
        break;
      default:
        MS_LOG(ERROR) << "Unsupported input data type " << input_data_type;
        return RET_ERROR;
    }
  }
  context->AppendCode(code.str());
  return RET_OK;
 }
 REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Cast, CPUOpCoderCreator<DTypeCastCoder>)
 REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_Cast, CPUOpCoderCreator<DTypeCastCoder>)
 REG_OPERATOR_CODER(kAllTargets, kNumberTypeUInt8, PrimitiveType_Cast, CPUOpCoderCreator<DTypeCastCoder>)
 REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_Cast, CPUOpCoderCreator<DTypeCastCoder>)
 }  // namespace mindspore::lite::micro
--- a/mindspore/lite/micro/coder/opcoders/base/dtype_cast_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/base/dtype_cast_coder.h
@@ -0,0 +1,43 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DTYPE_CAST_CODER_H
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DTYPE_CAST_CODER_H
 #include <vector>
 #include <memory>
 #include "micro/coder/opcoders/op_coder.h"
 #include "nnacl/int8/quant_dtype_cast_int8.h"
 namespace mindspore::lite::micro {
 class DTypeCastCoder : public OperatorCoder {
 public:
  DTypeCastCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                 const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~DTypeCastCoder() override = default;
  int Prepare(CoderContext *const context) override;
  int DoCode(CoderContext *const context) override;
 private:
  uint32_t stride_{0};
  uint32_t data_num_{0};
 };
 }  // namespace mindspore::lite::micro
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DTYPE_CAST_CODER_H
--- a/mindspore/lite/micro/coder/opcoders/base/full_connection_base_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/base/full_connection_base_coder.cc
@@ -0,0 +1,33 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/base/full_connection_base_coder.h"
 namespace mindspore::lite::micro {
 FullConnectionBaseCoder::~FullConnectionBaseCoder() { fc_param_ = nullptr; }
 int FullConnectionBaseCoder::Init() {
  this->fc_param_ = reinterpret_cast<MatMulParameter *>(parameter_);
  filter_tensor_ = input_tensors_.at(kWeightIndex);
  MS_CHECK_PTR(filter_tensor_);
  if (input_tensors_.size() == kInputSize2) {
    bias_tensor_ = input_tensors_.at(kBiasIndex);
    MS_CHECK_PTR(bias_tensor_);
    MS_CHECK_PTR(bias_tensor_->data_c());
  }
  return RET_OK;
 }
 }  // namespace mindspore::lite::micro
--- a/mindspore/lite/micro/coder/opcoders/base/full_connection_base_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/base/full_connection_base_coder.h
@@ -0,0 +1,43 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_FULLY_CONNECTED_BASE_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_FULLY_CONNECTED_BASE_CODER_H_
 #include <vector>
 #include "micro/coder/opcoders/op_coder.h"
 #include "nnacl/matmul_parameter.h"
 namespace mindspore::lite::micro {
 class FullConnectionBaseCoder : public OperatorCoder {
 public:
  FullConnectionBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                          const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~FullConnectionBaseCoder() override;
  int Init();
 protected:
  MatMulParameter *fc_param_{nullptr};
  Tensor *filter_tensor_{nullptr};
  Tensor *bias_tensor_{nullptr};
  int thread_count_{0};
  int thread_stride_{0};
 };
 }  // namespace mindspore::lite::micro
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_FULLY_CONNECTED_BASE_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/base/quant_dtype_cast_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/base/quant_dtype_cast_coder.cc
@@ -0,0 +1,74 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <string>
 #include "micro/coder/opcoders/op_coder.h"
 #include "micro/coder/opcoders/file_collector.h"
 #include "micro/coder/opcoders/base/quant_dtype_cast_coder.h"
 #include "micro/coder/opcoders/serializers/serializer.h"
 using mindspore::schema::PrimitiveType_QuantDTypeCast;
 namespace mindspore::lite::micro {
 int QuantDTypeCastCoder::Prepare(CoderContext *const context) {
  this->cast_param_ = reinterpret_cast<QuantDTypeCastParameter *>(parameter_);
  if (cast_param_->srcT == kNumberTypeFloat32 && cast_param_->dstT == kNumberTypeInt8) {
    if (input_tensor_->data_type() != kNumberTypeFloat32 || output_tensor_->data_type() != kNumberTypeInt8) {
      MS_LOG(ERROR) << "cast_param_ data type and tensor data type do not match.";
      return RET_ERROR;
    }
    inverse_ = false;
  } else if (cast_param_->srcT == kNumberTypeInt8 && cast_param_->dstT == kNumberTypeFloat32) {
    if (input_tensor_->data_type() != kNumberTypeInt8 || output_tensor_->data_type() != kNumberTypeFloat32) {
      MS_LOG(ERROR) << "cast_param_ data type and tensor data type do not match.";
      return RET_ERROR;
    }
    inverse_ = true;
  } else {
    MS_LOG(ERROR) << "cast_param_ data type not supported:"
                  << " src: " << cast_param_->srcT << " dst: " << cast_param_->dstT;
    return RET_PARAM_INVALID;
  }
  return RET_OK;
 }
 int QuantDTypeCastCoder::DoCode(CoderContext *const context) {
  // get quant params
  QuantArg in_quant_arg = input_tensor_->quant_params().at(0);
  // single thread for now
  int num_unit_thread = input_tensor_->ElementsNum();
  // generate code .h .c
  Collect(context, {"nnacl/int8/quant_dtype_cast_int8.h"}, {"quant_dtype_cast_int8.c"});
  Serializer code;
  code.precision(kPrecision);
  std::string function = inverse_ ? "DoDequantizeInt8ToFp32" : "DoQuantizeFp32ToInt8";
  code.CodeFunction(function, input_tensor_, output_tensor_, in_quant_arg.scale, in_quant_arg.zeroPoint,
                    num_unit_thread);
  context->AppendCode(code.str());
  return RET_OK;
 }
 REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_QuantDTypeCast,
                   CPUOpCoderCreator<QuantDTypeCastCoder>)
 REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_QuantDTypeCast, CPUOpCoderCreator<QuantDTypeCastCoder>)
 }  // namespace mindspore::lite::micro
--- a/mindspore/lite/micro/coder/opcoders/base/quant_dtype_cast_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/base/quant_dtype_cast_coder.h
@@ -0,0 +1,49 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_QUANT_DTYPE_CAST_CODER_H
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_QUANT_DTYPE_CAST_CODER_H
 #include <vector>
 #include <memory>
 #include "micro/coder/opcoders/op_coder.h"
 #include "nnacl/int8/quant_dtype_cast_int8.h"
 namespace mindspore::lite::micro {
 class QuantDTypeCastCoder : public OperatorCoder {
 public:
  QuantDTypeCastCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                      const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~QuantDTypeCastCoder() override = default;
  int Prepare(CoderContext *const context) override;
  int DoCode(CoderContext *const context) override;
 private:
  QuantDTypeCastParameter *cast_param_{nullptr};
  std::vector<Tensor *> inputs_;
  std::vector<Tensor *> outputs_;
  bool inverse_{false};
  int thread_num_{0};
  int thread_n_num_{0};
  int thread_n_stride_{0};
  int num_unit_{0};
 };
 }  // namespace mindspore::lite::micro
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_QUANT_DTYPE_CAST_CODER_H
--- a/mindspore/lite/micro/coder/opcoders/base/reduce_base_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/base/reduce_base_coder.cc
@@ -0,0 +1,145 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/base/reduce_base_coder.h"
 #include <vector>
 #include "micro/coder/opcoders/op_coder.h"
 namespace mindspore::lite::micro {
 namespace {
 constexpr size_t kInputNum = 1;
 constexpr size_t kOutputNum = 1;
 }  // namespace
 int ReduceBaseCoder::CheckInputsOutputs() {
  if (input_tensors_.size() < kInputNum) {
    MS_LOG(ERROR) << "Reduce inputs size should be at least " << kInputNum << " but got " << input_tensors_.size();
    return RET_ERROR;
  }
  if (output_tensors_.size() != kOutputNum) {
    MS_LOG(ERROR) << "Reduce outputs size should be " << kOutputNum << " but got " << output_tensors_.size();
    return RET_ERROR;
  }
  return RET_OK;
 }
 int ReduceBaseCoder::CheckParameters() {
  size_t input_rank = input_tensor_->shape().size();
  if (static_cast<size_t>(num_axes_) > input_rank) {
    MS_LOG(ERROR) << "Reduce op invalid num of reduce axes " << num_axes_ << " larger than input rank " << input_rank;
    return RET_ERROR;
  }
  for (auto i = 0; i < num_axes_; i++) {
    if (axes_[i] < -static_cast<int>(input_rank) || axes_[i] >= static_cast<int>(input_rank)) {
      MS_LOG(ERROR) << "Reduce got invalid axis " << axes_[i] << ", axis should be in ["
                    << -static_cast<int>(input_rank) << ", " << input_rank - 1 << "].";
      return RET_ERROR;
    }
    if (axes_[i] < 0) {
      axes_[i] += static_cast<int>(input_rank);
    }
  }
  if (reduce_to_end_) {
    // actual num of axes to reduce
    num_axes_ = static_cast<int>(input_rank) - axes_[0];
    MS_CHECK_TRUE(num_axes_ <= MAX_SHAPE_SIZE, "invalid num_axes_, greater than 8.");
    for (auto i = 1; i < num_axes_; ++i) {
      axes_[i] = axes_[0] + i;
    }
  }
  if (num_axes_ == 0) {
    for (size_t i = 0; i < input_rank; i++) {
      axes_[i] = i;
    }
    num_axes_ = static_cast<int>(input_rank);
  }
  return RET_OK;
 }
 int ReduceBaseCoder::Init() {
  auto reduce_param = reinterpret_cast<ReduceParameter *>(parameter_);
  if (reduce_param == nullptr) {
    return RET_NULL_PTR;
  }
  if (input_tensors_.size() > 1) {
    Tensor *axes_ptr = input_tensors_.at(1);
    num_axes_ = axes_ptr->ElementsNum();
    MS_CHECK_PTR(axes_ptr->MutableData());
    MS_CHECK_RET_CODE(memcpy_s(axes_, sizeof(axes_), axes_ptr->MutableData(), axes_ptr->Size()), "memcpy_s failed");
  } else {
    num_axes_ = reduce_param->num_axes_;
    MS_CHECK_RET_CODE(memcpy_s(axes_, sizeof(axes_), reduce_param->axes_, sizeof(reduce_param->axes_)),
                      "memcpy_s failed!");
  }
  mode_ = reduce_param->mode_;
  MS_CHECK_RET_CODE(memcpy_s(axes_, sizeof(axes_), reduce_param->axes_, sizeof(reduce_param->axes_)),
                    "memcpy_s failed!");
  reduce_to_end_ = reduce_param->reduce_to_end_;
  MS_CHECK_RET_CODE(CheckInputsOutputs(), "CheckInputsOutputs failed!");
  return RET_OK;
 }
 void ReduceBaseCoder::CalculateInnerOuterSize() {
  outer_sizes_.clear();
  inner_sizes_.clear();
  axis_sizes_.clear();
  std::vector<int> tmp_shape = input_tensors_.at(0)->shape();
  for (int i = 0; i < num_axes_; ++i) {
    int axis = axes_[i];
    int outer_size = 1;
    for (int j = 0; j < axis; j++) {
      outer_size *= tmp_shape.at(j);
    }
    outer_sizes_.emplace_back(outer_size);
    int inner_size = 1;
    for (int k = axis + 1; k < static_cast<int>(tmp_shape.size()); k++) {
      inner_size *= tmp_shape.at(k);
    }
    inner_sizes_.emplace_back(inner_size);
    axis_sizes_.emplace_back(tmp_shape[axis]);
    tmp_shape[axis] = 1;
  }
 }
 void ReduceBaseCoder::CalculateTmpBufferSize() {
  buffer_sizes_.clear();
  std::vector<int> input_shape = input_tensor_->shape();
  for (int i = 0; i < num_axes_; i++) {
    int axis = axes_[i];
    size_t size = 1;
    for (int j = 0; j < static_cast<int>(input_shape.size()); j++) {
      if (axis != j) {
        size *= input_shape.at(j);
      }
    }
    buffer_sizes_.emplace_back(size);
    input_shape[axis] = 1;
  }
 }
 int ReduceBaseCoder::ReSize() {
  int ret = CheckParameters();
  if (ret != RET_OK) {
    return ret;
  }
  CalculateTmpBufferSize();
  CalculateInnerOuterSize();
  return RET_OK;
 }
 }  // namespace mindspore::lite::micro
--- a/mindspore/lite/micro/coder/opcoders/base/reduce_base_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/base/reduce_base_coder.h
@@ -0,0 +1,59 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_BASE_CODER_H
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_BASE_CODER_H
 #include <vector>
 #include <memory>
 #include "micro/coder/opcoders/op_coder.h"
 #include "nnacl/reduce_parameter.h"
 namespace mindspore::lite::micro {
 class ReduceBaseCoder : public OperatorCoder {
 public:
  ReduceBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                  const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~ReduceBaseCoder() override = default;
  int Init();
  virtual int ReSize();
 private:
  int CheckInputsOutputs();
  int CheckParameters();
 protected:
  int axes_[MAX_SHAPE_SIZE]{};
  int num_axes_{0};
  int mode_{0};
  bool reduce_to_end_{false};
 protected:
  void CalculateTmpBufferSize();
  void CalculateInnerOuterSize();
  std::vector<size_t> buffer_sizes_;
  std::vector<int> outer_sizes_;
  std::vector<int> inner_sizes_;
  std::vector<int> axis_sizes_;
  int outer_size_{0};
  int inner_size_{0};
  int axis_size_{0};
 };
 }  // namespace mindspore::lite::micro
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_BASE_CODER_H
--- a/mindspore/lite/micro/coder/opcoders/base/softmax_base_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/base/softmax_base_coder.cc
@@ -0,0 +1,56 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/base/softmax_base_coder.h"
 #include <vector>
 #include <type_traits>
 namespace mindspore::lite::micro {
 int SoftmaxBaseCoder::Init() {
  this->softmax_param_ = reinterpret_cast<SoftmaxParameter *>(parameter_);
  std::vector<int> in_shape = input_tensor_->shape();
  size_t in_dims = in_shape.size();
  MS_CHECK_TRUE(in_dims < std::extent<decltype(softmax_param_->input_shape_)>::value,
                "in_dims should be less than input_shape_ size");
  int ele_size = 1;
  softmax_param_->n_dim_ = in_dims;
  for (int i = 0; i < static_cast<int>(in_dims); i++) {
    softmax_param_->input_shape_[i] = in_shape.at(i);
    ele_size *= in_shape.at(i);
  }
  softmax_param_->element_size_ = ele_size;
  return RET_OK;
 }
 int SoftmaxBaseCoder::ReSize() {
  std::vector<int> in_shape = input_tensor_->shape();
  size_t in_dims = in_shape.size();
  MS_CHECK_TRUE(in_dims < std::extent<decltype(softmax_param_->input_shape_)>::value,
                "in_dims should be less than input_shape_ size");
  int ele_size = 1;
  softmax_param_->n_dim_ = in_dims;
  if (softmax_param_->axis_ == -1) {
    softmax_param_->axis_ += in_dims;
  }
  for (size_t i = 0; i < in_dims; i++) {
    softmax_param_->input_shape_[i] = in_shape.at(i);
    ele_size *= in_shape.at(i);
  }
  softmax_param_->element_size_ = ele_size;
  return RET_OK;
 }
 }  // namespace mindspore::lite::micro
--- a/mindspore/lite/micro/coder/opcoders/base/softmax_base_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/base/softmax_base_coder.h
@@ -0,0 +1,50 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_SOFTMAX_BASE_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_SOFTMAX_BASE_CODER_H_
 #include <vector>
 #include <string>
 #include "micro/coder/opcoders/op_coder.h"
 #include "nnacl/softmax_parameter.h"
 #include "nnacl/int8/quantize.h"
 namespace mindspore::lite::micro {
 using std::string;
 class SoftmaxBaseCoder : public OperatorCoder {
 public:
  SoftmaxBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                   const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~SoftmaxBaseCoder() override { softmax_param_ = nullptr; }
 protected:
  int Init();
  int ReSize();
  SoftmaxParameter *softmax_param_{nullptr};
  int thread_count_{0};
  SoftmaxQuantArg quant_params_{};
 };
 }  // namespace mindspore::lite::micro
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_SOFTMAX_BASE_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc
@@ -0,0 +1,89 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/cmsis-nn/int8/add_int8_coder.h"
 #include <algorithm>
 #include <limits>
 #include "micro/coder/opcoders/serializers/serializer.h"
 #include "nnacl/arithmetic.h"
 #include "nnacl/int8/quantize.h"
 #include "micro/coder/opcoders/file_collector.h"
 #include "micro/coder/log.h"
 using mindspore::schema::PrimitiveType_Add;
 namespace mindspore::lite::micro::cmsis {
 int AddInt8Coder::Prepare(CoderContext *const context) {
  input1_ = input_tensors_.at(0);
  input2 = input_tensors_.at(1);
  MS_CHECK_PTR(input1_);
  MS_CHECK_PTR(input2);
  MS_CHECK_TRUE(!input1_->quant_params().empty(), "input1_ quant_params is empty");
  MS_CHECK_TRUE(!input2->quant_params().empty(), "input2_ quant_params is empty");
  MS_CHECK_TRUE(!output_tensor_->quant_params().empty(), "output quant_params is empty");
  input_1_offset_ = -input1_->quant_params().at(0).zeroPoint;
  input_2_offset_ = -input2->quant_params().at(0).zeroPoint;
  out_offset_ = output_tensor_->quant_params().at(0).zeroPoint;
  const double input1_scale = input1_->quant_params().at(0).scale;
  const double input2_scale = input2->quant_params().at(0).scale;
  const double output_scale = output_tensor_->quant_params().at(0).scale;
  left_shift_ = 20;
  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
  const double real_input1_multiplier = static_cast<double>(input1_scale) / twice_max_input_scale;
  const double real_input2_multiplier = static_cast<double>(input2_scale) / twice_max_input_scale;
  const double real_output_multiplier =
    twice_max_input_scale / ((1 << static_cast<size_t>(left_shift_)) * static_cast<double>(output_scale));
  MS_CHECK_TRUE(0 <= real_input1_multiplier && real_input1_multiplier <= 1,
                "real_input1_multiplier should be in (0, 1)");
  QuantizeMultiplier(real_input1_multiplier, &input_1_mult_, &input_1_shift_);
  MS_CHECK_TRUE(0 <= real_input2_multiplier && real_input2_multiplier <= 1,
                "real_input2_multiplier should be in (0, 1)");
  QuantizeMultiplier(real_input2_multiplier, &input_2_mult_, &input_2_shift_);
  MS_CHECK_TRUE(0 <= real_output_multiplier && real_output_multiplier <= 1,
                "real_output_multiplier should be in (0, 1)");
  QuantizeMultiplier(real_output_multiplier, &out_mult_, &out_shift_);
  out_activation_min_ = std::numeric_limits<int8_t>::min();
  out_activation_max_ = std::numeric_limits<int8_t>::max();
  MS_CHECK_TRUE(input1_->ElementsNum() == input2->ElementsNum(), "tensor length not match");
  block_size_ = input1_->ElementsNum();
  return RET_OK;
 }
 int AddInt8Coder::DoCode(CoderContext *const context) {
  Serializer code;
  code.precision(kPrecision);
  Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_elementwise_add_s8.c"});
  code.CodeFunction("arm_elementwise_add_s8", input1_, input2, input_1_offset_, input_1_mult_, input_1_shift_,
                    input_2_offset_, input_2_mult_, input_2_shift_, left_shift_, output_tensor_, out_offset_, out_mult_,
                    out_shift_, out_activation_min_, out_activation_max_, block_size_);
  MS_LOG(INFO) << "AddInt8Coder has been called";
  context->AppendCode(code.str());
  return RET_OK;
 }
 REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Add, CPUOpCoderCreator<AddInt8Coder>)
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/add_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/add_int8_coder.h
@@ -0,0 +1,56 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_ADD_INT8_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_ADD_INT8_CODER_H_
 #include <vector>
 #include "micro/coder/opcoders/op_coder.h"
 namespace mindspore::lite::micro::cmsis {
 class AddInt8Coder : public OperatorCoder {
 public:
  AddInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
               const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~AddInt8Coder() override = default;
  int Prepare(CoderContext *const context) override;
  int DoCode(CoderContext *const context) override;
 private:
  Tensor *input1_{nullptr};
  Tensor *input2{nullptr};
  int32_t input_1_offset_{0};
  int32_t input_1_mult_{0};
  int32_t input_1_shift_{0};
  int32_t input_2_offset_{0};
  int32_t input_2_mult_{0};
  int32_t input_2_shift_{0};
  int32_t left_shift_{0};
  int32_t out_offset_{0};
  int32_t out_mult_{0};
  int32_t out_shift_{0};
  int32_t out_activation_min_{0};
  int32_t out_activation_max_{0};
  uint32_t block_size_{0};
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_ADD_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.cc
@@ -0,0 +1,60 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.h"
 #include "nnacl/int8/quantize.h"
 namespace mindspore::lite::micro::cmsis {
 int Conv2DBaseCoder::SetQuantArgs() {
  int channel = output_tensor_->Channel();
  size_t channel_data_size = static_cast<size_t>(channel) * sizeof(int32_t);
  output_mult_ = reinterpret_cast<int32_t *>(malloc(channel_data_size));
  MS_CHECK_PTR(output_mult_);
  output_shift_ = reinterpret_cast<int32_t *>(malloc(channel_data_size));
  MS_CHECK_PTR(output_shift_);
  const ::QuantArg *filter_quant_args = conv_quant_arg_->filter_quant_args_;
  auto input_scale = static_cast<double>(conv_quant_arg_->input_quant_args_[0].scale_);
  auto output_scale = static_cast<double>(conv_quant_arg_->output_quant_args_[0].scale_);
  int32_t significand;
  int channel_shift;
  if (conv_quant_arg_->filter_arg_num_ > 1) {
    for (int i = 0; i < channel; ++i) {
      // If per-tensor quantization parameter is specified, broadcast it along the
      // quantization dimension (channels_out).
      MS_CHECK_TRUE(conv_quant_arg_->filter_arg_num_ == static_cast<size_t>(channel), "quant num not match");
      const auto filter_scale = static_cast<double>(filter_quant_args[i].scale_);
      const double effective_output_scale = input_scale * filter_scale / output_scale;
      QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
      output_mult_[i] = significand;
      output_shift_[i] = channel_shift;
    }
  } else {
    // broadcast multiplier and shift to all array if per-tensor
    const auto filter_scale = static_cast<double>(filter_quant_args[0].scale_);
    const double effective_output_scale = input_scale * filter_scale / output_scale;
    QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
    for (int i = 0; i < channel; ++i) {
      output_mult_[i] = significand;
      output_shift_[i] = channel_shift;
    }
  }
  return RET_OK;
 }
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.h
@@ -0,0 +1,46 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_CONV2D_CMSIS_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_CONV2D_CMSIS_CODER_H_
 #include <string>
 #include <vector>
 #include "micro/coder/opcoders/base/conv2d_base_coder.h"
 #include "nnacl/conv_parameter.h"
 namespace mindspore::lite::micro::cmsis {
 class Conv2DBaseCoder : public micro::Conv2DBaseCoder {
 public:
  explicit Conv2DBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                           const Model::Node *node, size_t node_index, Target target)
      : micro::Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~Conv2DBaseCoder() override {
    free(output_mult_);
    free(output_shift_);
  }
 protected:
  int SetQuantArgs();
  int32_t *output_mult_{nullptr};
  int32_t *output_shift_{nullptr};
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_CONV2D_CMSIS_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
@@ -0,0 +1,163 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.h"
 #include <memory>
 #include <string>
 #include <vector>
 #include "micro/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.h"
 #include "micro/coder/opcoders/serializers/serializer.h"
 #include "micro/coder/opcoders/file_collector.h"
 using mindspore::schema::PrimitiveType_Conv2D;
 namespace mindspore::lite::micro::cmsis {
 int Conv2DInt8Coder::Prepare(CoderContext *const context) {
  Conv2DBaseCoder::Init();
  MS_CHECK_RET_CODE(micro::Conv2DBaseCoder::CheckLayout(input_tensor_), "CheckLayout failed");
  MS_CHECK_RET_CODE(micro::Conv2DBaseCoder::SetQuantParam(), "SetQuantParam failed");
  MS_CHECK_RET_CODE(Conv2DBaseCoder::SetQuantArgs(), "SetQuantArgs failed");
  MS_CHECK_RET_CODE(SetParameters(), "SetParameters failed");
  CheckSupportOptimize();
  MS_CHECK_RET_CODE(InitTmpBuffer(), "InitTmpBuffer failed");
  return RET_OK;
 }
 int Conv2DInt8Coder::DoCode(CoderContext *const context) {
  Serializer code;
  code.precision(kPrecision);
  std::vector<string> h_files;
  std::vector<string> c_files;
  h_files.emplace_back("CMSIS/NN/Include/arm_nnfunctions.h");
  string buffer_str = "NULL";
  if (opt_ != Convolve_1x1_fast) {
    buffer_str = allocator_->GetRuntimeAddr(buffer_);
    code << "  memset(" << buffer_str << ", 0, " << buffer_size_ << ");\n";
  }
  code.CodeArray("output_shift", output_shift_, output_ch_);
  code.CodeArray("output_mult", output_mult_, output_ch_);
  switch (opt_) {
    case Basic:
      c_files = {"arm_convolve_s8.c", "arm_nn_mat_mult_kernel_s8_s16.c", "arm_q7_to_q15_with_offset.c"};
      Collect(context, h_files, c_files);
      code.CodeFunction("arm_convolve_s8", input_tensor_, input_x_, input_y_, input_ch_, input_batches_, filter_tensor_,
                        output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
                        output_tensor_, "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_,
                        out_activation_max_, output_x_, output_y_, buffer_str);
      break;
    case Convolve_1_x_n:
      c_files = {"arm_convolve_1_x_n_s8.c", "arm_nn_mat_mul_core_1x_s8.c"};
      Collect(context, h_files, c_files);
      code.CodeFunction("arm_convolve_1_x_n_s8", input_tensor_, input_x_, input_ch_, input_batches_, filter_tensor_,
                        output_ch_, kernel_x_, pad_x_, stride_x_, bias_tensor_, output_tensor_, "output_shift",
                        "output_mult", out_offset_, input_offset_, out_activation_min_, out_activation_max_, output_x_,
                        buffer_str);
      break;
    case Convolve_1x1_fast:
      c_files = {"arm_convolve_1x1_s8_fast.c", "arm_nn_mat_mult_nt_t_s8.c", "arm_nn_mat_mul_core_4x_s8.c",
                 "arm_nn_mat_mul_core_1x_s8.c"};
      Collect(context, h_files, c_files);
      code.CodeFunction("arm_convolve_1x1_s8_fast", input_tensor_, input_x_, input_y_, input_ch_, input_batches_,
                        filter_tensor_, output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_,
                        "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_,
                        out_activation_max_, output_x_, output_y_, buffer_str);
      break;
    default:
      MS_LOG(ERROR) << "opt enum value is not defined";
      return RET_ERROR;
  }
  context->AppendCode(code.str());
  return RET_OK;
 }
 int Conv2DInt8Coder::SetParameters() {
  MS_CHECK_TRUE(input_tensor_->Channel() == filter_tensor_->DimensionSize(3),
                "input Channel and filter size not match!");
  MS_CHECK_TRUE(output_tensor_->Channel() == filter_tensor_->DimensionSize(0),
                "output Channel and filter size not match!");
  input_x_ = input_tensor_->Width();
  input_y_ = input_tensor_->Height();
  input_ch_ = input_tensor_->Channel();
  input_batches_ = input_tensor_->Batch();
  kernel_x_ = filter_tensor_->DimensionSize(2);
  kernel_y_ = filter_tensor_->DimensionSize(1);
  pad_x_ = conv_param_->pad_l_;
  pad_y_ = conv_param_->pad_u_;
  stride_x_ = conv_param_->stride_w_;
  stride_y_ = conv_param_->stride_h_;
  MS_CHECK_TRUE(!input_tensor_->quant_params().empty(), "input quant_params is empty");
  MS_CHECK_TRUE(!output_tensor_->quant_params().empty(), "output quant_params is empty");
  QuantArg input_quant_arg = input_tensor_->quant_params().at(0);
  QuantArg output_quant_arg = output_tensor_->quant_params().at(0);
  input_offset_ = -input_quant_arg.zeroPoint;
  out_offset_ = output_quant_arg.zeroPoint;
  output_x_ = output_tensor_->DimensionSize(2);
  output_y_ = output_tensor_->DimensionSize(1);
  output_ch_ = output_tensor_->Channel();
  CalculateActivationRangeQuantized(conv_param_->act_type_ == ActType_Relu, conv_param_->act_type_ == ActType_Relu6,
                                    output_quant_arg.zeroPoint, static_cast<float>(output_quant_arg.scale),
                                    &out_activation_min_, &out_activation_max_);
  return RET_OK;
 }
 void Conv2DInt8Coder::CheckSupportOptimize() {
  if ((pad_x_ == 0) && (pad_y_ == 0) && (input_ch_ % 4 == 0) && (stride_x_ == 1) && (stride_y_ == 1) &&
      (kernel_x_ == 1) && (kernel_y_ == 1)) {
    opt_ = Convolve_1x1_fast;
    return;
  }
  if ((output_x_ == 1) && (input_x_ == 1) && (kernel_y_ == 1) && (output_x_ % 4 == 0) && (input_batches_ == 1)) {
    opt_ = Convolve_1_x_n;
    return;
  }
  opt_ = Basic;
 }
 int Conv2DInt8Coder::InitTmpBuffer() {
  switch (opt_) {
    case Basic:
      buffer_size_ =
        (2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) * (int32_t)sizeof(int16_t);
      break;
    case Convolve_1_x_n:
      buffer_size_ =
        (2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) * sizeof(int16_t);
      break;
    case Convolve_1x1_fast:
      // do nothing
      buffer_size_ = 0;
      return RET_OK;
    default:
      MS_LOG(ERROR) << "opt enum value is not defined";
      return RET_ERROR;
  }
  buffer_ = static_cast<int16_t *>(allocator_->Malloc(kNumberTypeInt16, buffer_size_, kWorkspace));
  MS_CHECK_PTR(buffer_);
  return RET_OK;
 }
 REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Conv2D, CPUOpCoderCreator<Conv2DInt8Coder>)
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.h
@@ -0,0 +1,72 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_CONV2D_INT8_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_CONV2D_INT8_CODER_H_
 #include <string>
 #include <vector>
 #include "micro/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.h"
 #include "nnacl/conv_parameter.h"
 namespace mindspore::lite::micro::cmsis {
 class Conv2DInt8Coder : public Conv2DBaseCoder {
 public:
  explicit Conv2DInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                           const Model::Node *node, size_t node_index, Target target)
      : Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
  int Prepare(CoderContext *context) override;
  int DoCode(CoderContext *ctx) override;
  ~Conv2DInt8Coder() override = default;
 private:
  enum ConvOpt { Basic = 0, Convolve_1_x_n = 1, Convolve_1x1_fast = 2 };
  void CheckSupportOptimize();
  int SetParameters();
  int InitTmpBuffer();
  uint16_t input_x_{0};
  uint16_t input_y_{0};
  uint16_t input_ch_{0};
  uint16_t input_batches_{0};
  uint16_t output_ch_{0};
  uint16_t kernel_x_{0};
  uint16_t kernel_y_{0};
  uint16_t pad_x_{0};
  uint16_t pad_y_{0};
  uint16_t stride_x_{0};
  uint16_t stride_y_{0};
  int32_t out_offset_{0};
  int32_t input_offset_{0};
  int32_t out_activation_min_{0};
  int32_t out_activation_max_{0};
  uint16_t output_x_{0};
  uint16_t output_y_{0};
  int16_t *buffer_{nullptr};
  int32_t buffer_size_{0};
  ConvOpt opt_{ConvOpt::Basic};
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_CONV2D_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.cc
@@ -0,0 +1,158 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.h"
 #include <string>
 #include "micro/coder/opcoders/serializers/serializer.h"
 #include "micro/coder/opcoders/file_collector.h"
 #include "micro/coder/log.h"
 using mindspore::schema::PrimitiveType_DepthwiseConv2D;
 namespace mindspore::lite::micro::cmsis {
 int DWConvInt8Coder::Prepare(CoderContext *const context) {
  Conv2DBaseCoder::Init();
  MS_CHECK_RET_CODE(micro::Conv2DBaseCoder::CheckLayout(input_tensor_), "Check layout failed.");
  MS_CHECK_RET_CODE(micro::Conv2DBaseCoder::SetQuantParam(), "SetQuantParam failed");
  MS_CHECK_RET_CODE(Conv2DBaseCoder::SetQuantArgs(), "SetQuantArgs failed");
  MS_CHECK_RET_CODE(InitWeightBias(), "InitWeightBias failed");
  MS_CHECK_RET_CODE(SetParameters(), "SetParameters failed");
  CheckSupportOptimize();
  MS_CHECK_RET_CODE(InitTmpBuffer(), "InitTmpBuffer failed");
  return RET_OK;
 }
 int DWConvInt8Coder::DoCode(CoderContext *const context) {
  Serializer code;
  code.precision(kPrecision);
  std::vector<std::string> h_files;
  std::vector<std::string> c_files;
  h_files.emplace_back("CMSIS/NN/Include/arm_nnfunctions.h");
  code.CodeArray("output_shift", output_shift_, output_ch_);
  code.CodeArray("output_mult", output_mult_, output_ch_);
  switch (optimize_) {
    case Conv_3x3:
      c_files.emplace_back("arm_depthwise_conv_3x3_s8.c");
      Collect(context, h_files, c_files);
      code.CodeFunction("arm_depthwise_conv_3x3_s8", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
                        output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_, "output_shift",
                        "output_mult", output_x_, output_y_, output_offset_, input_offset_, output_activation_min_,
                        output_activation_max_, dilation_x_, dilation_y_, "NULL");
      break;
    case Conv_opt:
      // arm_depthwise_conv_s8_opt also depends on arm_depthwise_conv_s8
      c_files.emplace_back("arm_depthwise_conv_s8.c");
      c_files.emplace_back("arm_depthwise_conv_s8_opt.c");
      Collect(context, h_files, c_files);
      code.CodeFunction("arm_depthwise_conv_s8_opt", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
                        output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
                        output_tensor_, "output_shift", "output_mult", output_x_, output_y_, output_offset_,
                        input_offset_, output_activation_min_, output_activation_max_, dilation_x_, dilation_y_,
                        "NULL");
      break;
    case Basic:
      c_files.emplace_back("arm_depthwise_conv_s8.c");
      Collect(context, h_files, c_files);
      code.CodeFunction("arm_depthwise_conv_s8", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
                        output_ch_, ch_mult_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
                        output_tensor_, "output_shift", "output_mult", output_x_, output_y_, output_offset_,
                        input_offset_, output_activation_min_, output_activation_max_, dilation_x_, dilation_y_,
                        "NULL");
      break;
    default:
      MS_LOG(ERROR) << "unsupported optimize_r";
      break;
  }
  context->AppendCode(code.str());
  return RET_OK;
 }
 int DWConvInt8Coder::InitWeightBias() {
  auto *origin_weight = reinterpret_cast<int8_t *>(filter_tensor_->data_c());
  MS_CHECK_PTR(origin_weight);
  auto pack_weight_size =
    static_cast<size_t>(filter_tensor_->Batch() * filter_tensor_->Height() * filter_tensor_->Width());
  packed_weight_ =
    static_cast<int8_t *>(allocator_->Malloc(kNumberTypeInt8, pack_weight_size * sizeof(int8_t), kOfflinePackWeight));
  MS_ASSERT(packed_weight_);
  PackNCHWToNHWCInt8(origin_weight, packed_weight_, 1, filter_tensor_->Height() * filter_tensor_->Width(),
                     filter_tensor_->Batch());
  return RET_OK;
 }
 int DWConvInt8Coder::SetParameters() {
  input_x_ = input_tensor_->Width();
  input_y_ = input_tensor_->Height();
  input_ch_ = input_tensor_->Channel();
  output_ch_ = output_tensor_->Channel();
  // depth_multiplier
  ch_mult_ = output_tensor_->Channel() / input_tensor_->Channel();
  kernel_x_ = filter_tensor_->Width();
  kernel_y_ = filter_tensor_->Height();
  pad_y_ = conv_param_->pad_u_;
  pad_x_ = conv_param_->pad_l_;
  stride_y_ = conv_param_->stride_h_;
  stride_x_ = conv_param_->stride_w_;
  QuantArg input_quant_arg = input_tensor_->quant_params().at(0);
  QuantArg output_quant_arg = output_tensor_->quant_params().at(0);
  output_x_ = output_tensor_->Width();
  output_y_ = output_tensor_->Height();
  input_offset_ = -input_quant_arg.zeroPoint;
  output_offset_ = output_quant_arg.zeroPoint;
  CalculateActivationRangeQuantized(conv_param_->act_type_ == ActType_Relu, conv_param_->act_type_ == ActType_Relu6,
                                    output_quant_arg.zeroPoint, output_quant_arg.scale, &output_activation_min_,
                                    &output_activation_max_);
  return RET_OK;
 }
 void DWConvInt8Coder::CheckSupportOptimize() {
  if (ch_mult_ == 1) {
    if ((kernel_x_ == 3) && (kernel_y_ == 3) && (pad_y_ <= 1)) {
      optimize_ = Conv_3x3;
      buffer_size_ = 0;
    } else {
      optimize_ = Conv_opt;
      buffer_size_ = input_ch_ * kernel_x_ * kernel_y_ * sizeof(int16_t);
    }
  } else {
    optimize_ = Basic;
    buffer_size_ = 0;
  }
 }
 int DWConvInt8Coder::InitTmpBuffer() {
  if (buffer_size_ != 0) {
    buffer = static_cast<int16_t *>(allocator_->Malloc(kNumberTypeInt16, buffer_size_, kWorkspace));
    MS_CHECK_PTR(buffer);
  } else {
    buffer = nullptr;
  }
  return 0;
 }
 REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_DepthwiseConv2D, CPUOpCoderCreator<DWConvInt8Coder>)
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.h
@@ -0,0 +1,79 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_DWCONV_INT8_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_DWCONV_INT8_CODER_H_
 #include <vector>
 #include "micro/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.h"
 #include "src/runtime/kernel/arm/int8/convolution_depthwise_int8.h"
 namespace mindspore::lite::micro::cmsis {
 class DWConvInt8Coder : public Conv2DBaseCoder {
 public:
  DWConvInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                  const Model::Node *node, size_t node_index, Target target)
      : Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~DWConvInt8Coder() override = default;
  int Prepare(CoderContext *context) override;
  int DoCode(CoderContext *context) override;
 private:
  enum DwConvOpt {
    Basic = 0,
    Conv_3x3 = 1,
    Conv_opt = 2,
  };
  int SetParameters();
  void CheckSupportOptimize();
  int InitTmpBuffer();
  int InitWeightBias();
  int32_t input_x_{0};
  int32_t input_y_{0};
  int32_t input_ch_{0};
  int32_t output_ch_{0};
  int32_t ch_mult_{0};
  int32_t kernel_x_{0};
  int32_t kernel_y_{0};
  int32_t pad_x_{0};
  int32_t pad_y_{0};
  int32_t stride_x_{0};
  int32_t stride_y_{0};
  int32_t output_x_{0};
  int32_t output_y_{0};
  int32_t output_offset_{0};
  int32_t input_offset_{0};
  int32_t output_activation_min_{0};
  int32_t output_activation_max_{0};
  uint16_t dilation_x_{0};
  uint16_t dilation_y_{0};
  int8_t *packed_weight_{nullptr};
  DwConvOpt optimize_{Basic};
  size_t buffer_size_{0};
  int16_t *buffer{nullptr};
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_CMSIS_NN_DWCONV_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.cc
@@ -0,0 +1,73 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.h"
 #include "micro/coder/opcoders/serializers/serializer.h"
 #include "micro/coder/opcoders/file_collector.h"
 using mindspore::schema::PrimitiveType_FullConnection;
 namespace mindspore::lite::micro::cmsis {
 int FullConnectionInt8Coder::Prepare(CoderContext *const context) {
  FullConnectionBaseCoder::Init();
  ConfigInputOutput();
  MS_CHECK_RET_CODE(SetParameters(), "SetParameters failed");
  return RET_OK;
 }
 void FullConnectionInt8Coder::ConfigInputOutput() { output_tensor_->set_format(schema::Format_NHWC); }
 int FullConnectionInt8Coder::DoCode(CoderContext *const context) {
  Serializer code;
  code.precision(kPrecision);
  Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_fully_connected_s8.c", "arm_nn_vec_mat_mult_t_s8.c"});
  code.CodeFunction("arm_fully_connected_s8", input_tensor_, filter_tensor_, col_dim_, row_dim_, nb_batches_,
                    input_offset_, filter_offset_, out_multiplier_, out_shift_, output_offset_, bias_tensor_,
                    output_tensor_, output_activation_min_, output_activation_max_, "NULL");
  context->AppendCode(code.str());
  return RET_OK;
 }
 int FullConnectionInt8Coder::SetParameters() {
  MS_CHECK_TRUE(output_tensor_->shape().size() == 2, "output tensor size should be 2");
  MS_CHECK_TRUE(!input_tensor_->quant_params().empty(), "input quant_params is empty");
  MS_CHECK_TRUE(!filter_tensor_->quant_params().empty(), "filter quant_params is empty");
  MS_CHECK_TRUE(!output_tensor_->quant_params().empty(), "output quant_params is empty");
  QuantArg input_quant_arg = input_tensor_->quant_params().at(0);
  QuantArg filter_quant_arg = filter_tensor_->quant_params().at(0);
  QuantArg output_quant_arg = output_tensor_->quant_params().at(0);
  double real_multiplier = input_quant_arg.scale * filter_quant_arg.scale / output_quant_arg.scale;
  QuantizeMultiplier(real_multiplier, &out_multiplier_, &out_shift_);
  CalculateActivationRangeQuantized(fc_param_->act_type_ == ActType_Relu, fc_param_->act_type_ == ActType_Relu6,
                                    output_quant_arg.zeroPoint, output_quant_arg.scale, &output_activation_min_,
                                    &output_activation_max_);
  input_offset_ = -input_quant_arg.zeroPoint;
  filter_offset_ = -filter_quant_arg.zeroPoint;
  output_offset_ = output_quant_arg.zeroPoint;
  col_dim_ = filter_tensor_->DimensionSize(filter_tensor_->shape().size() - 1);
  row_dim_ = output_tensor_->DimensionSize(1);
  nb_batches_ = input_tensor_->Batch();
  return RET_OK;
 }
 REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_FullConnection, CPUOpCoderCreator<FullConnectionInt8Coder>)
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.h
@@ -0,0 +1,55 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_FULLCONNECTION_INT8_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_FULLCONNECTION_INT8_CODER_H_
 #include <string>
 #include <vector>
 #include "micro/coder/opcoders/op_coder.h"
 #include "micro/coder/opcoders/base/full_connection_base_coder.h"
 #include "nnacl/int8/quantize.h"
 namespace mindspore::lite::micro::cmsis {
 class FullConnectionInt8Coder : public FullConnectionBaseCoder {
 public:
  FullConnectionInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                          const Model::Node *node, size_t node_index, Target target)
      : FullConnectionBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
  int Prepare(CoderContext *const context) override;
  int DoCode(CoderContext *const context) override;
  ~FullConnectionInt8Coder() override = default;
 private:
  int SetParameters();
  void ConfigInputOutput();
  uint16_t col_dim_{0};
  uint16_t row_dim_{0};
  uint16_t nb_batches_{0};
  int32_t input_offset_{0};
  int32_t filter_offset_{0};
  int32_t out_multiplier_{0};
  int32_t out_shift_{0};
  int32_t output_offset_{0};
  int32_t output_activation_min_{0};
  int32_t output_activation_max_{0};
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_FULLCONNECTION_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/mul_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/mul_int8_coder.cc
@@ -0,0 +1,73 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/cmsis-nn/int8/mul_int8_coder.h"
 #include <string>
 #include "micro/coder/opcoders/serializers/serializer.h"
 #include "nnacl/int8/quantize.h"
 #include "micro/coder/opcoders/file_collector.h"
 using mindspore::schema::PrimitiveType_Mul;
 namespace mindspore::lite::micro::cmsis {
 int MulInt8Coder::Prepare(CoderContext *const context) {
  input1_ = OperatorCoder::input_tensors().at(0);
  input2_ = OperatorCoder::input_tensors().at(1);
  MS_CHECK_PTR(input1_);
  MS_CHECK_PTR(input2_);
  MS_CHECK_TRUE(!input1_->quant_params().empty(), "input1_ quant_params is empty");
  MS_CHECK_TRUE(!input2_->quant_params().empty(), "input2_ quant_params is empty");
  MS_CHECK_TRUE(!output_tensor_->quant_params().empty(), "output quant_params is empty");
  input_1_offset_ = -input1_->quant_params().at(0).zeroPoint;
  input_2_offset_ = -input2_->quant_params().at(0).zeroPoint;
  out_offset_ = output_tensor_->quant_params().at(0).zeroPoint;
  const double input1_scale = input1_->quant_params().at(0).scale;
  const double input2_scale = input2_->quant_params().at(0).scale;
  const double output_scale = output_tensor_->quant_params().at(0).scale;
  const double real_multiplier = input1_scale * input2_scale / output_scale;
  QuantizeMultiplier(real_multiplier, &out_mult_, &out_shift_);
  CalculateActivationRangeQuantized(false, false, out_offset_, output_scale, &out_activation_min_,
                                    &out_activation_max_);
  MS_CHECK_TRUE(input1_->ElementsNum() == input2_->ElementsNum(), "tensor length not match");
  block_size_ = input1_->ElementsNum();
  return RET_OK;
 }
 int MulInt8Coder::DoCode(CoderContext *const context) {
  Serializer code;
  code.precision(kPrecision);
  Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_elementwise_mul_s8.c"});
  code.CodeFunction("arm_elementwise_mul_s8", input1_, input2_, input_1_offset_, input_2_offset_, output_tensor_,
                    out_offset_, out_mult_, out_shift_, out_activation_min_, out_activation_max_, block_size_);
  MS_LOG(INFO) << "MulInt8Coder has been called";
  context->AppendCode(code.str());
  return RET_OK;
 }
 REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Mul, CPUOpCoderCreator<MulInt8Coder>)
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/mul_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/mul_int8_coder.h
@@ -0,0 +1,49 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_MUL_INT8_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_MUL_INT8_CODER_H_
 #include <vector>
 #include "micro/coder/opcoders/op_coder.h"
 namespace mindspore::lite::micro::cmsis {
 class MulInt8Coder : public OperatorCoder {
 public:
  MulInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
               const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~MulInt8Coder() override = default;
  int Prepare(CoderContext *const context) override;
  int DoCode(CoderContext *const context) override;
 private:
  Tensor *input1_{nullptr};
  Tensor *input2_{nullptr};
  int32_t input_1_offset_{0};
  int32_t input_2_offset_{0};
  int32_t out_offset_{0};
  int32_t out_mult_{0};
  int32_t out_shift_{0};
  int32_t out_activation_min_{0};
  int32_t out_activation_max_{0};
  uint32_t block_size_{0};
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_MUL_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc
@@ -0,0 +1,102 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <string>
 #include <vector>
 #include "micro/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.h"
 #include "micro/coder/opcoders/serializers/serializer.h"
 #include "micro/coder/opcoders/file_collector.h"
 using mindspore::schema::PrimitiveType_Pooling;
 namespace mindspore::lite::micro::cmsis {
 int PoolingInt8Coder::Prepare(CoderContext *const context) {
  this->pooling_parameter_ = reinterpret_cast<PoolingParameter *>(parameter_);
  // get tensors
  MS_CHECK_RET_CODE(SetParameters(), "SetParameters failed");
  if (pooling_parameter_->pool_mode_ == PoolMode_AvgPool) {
    buffer_size_ = input_tensor_->Channel() * sizeof(int32_t);
    buffer_ = static_cast<int32_t *>(allocator_->Malloc(kNumberTypeInt32, buffer_size_, kWorkspace));
    MS_CHECK_PTR(buffer_);
  }
  return RET_OK;
 }
 int PoolingInt8Coder::DoCode(CoderContext *const context) {
  // init struct PoolingParameters
  std::string buffer_str = "NULL";
  std::string pooling_func;
  std::vector<std::string> cFiles;
  if (pooling_parameter_->pool_mode_ == PoolMode_AvgPool) {
    cFiles = {"arm_avgpool_s8.c"};
    pooling_func = "arm_avgpool_s8";
    buffer_str = allocator_->GetRuntimeAddr(buffer_);
  } else if (pooling_parameter_->pool_mode_ == PoolMode_MaxPool) {
    cFiles = {"arm_max_pool_s8.c"};
    pooling_func = "arm_max_pool_s8";
  } else {
    MS_LOG(ERROR) << "unsupported pad mode";
    return RET_ERROR;
  }
  Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, cFiles);
  Serializer code;
  code.precision(kPrecision);
  code.CodeFunction(pooling_func, "&nn_context", "&pool_params", "&input_dims", input_tensor_, "&filter_dims",
                    "&output_dims", output_tensor_);
  code.CodeFunction(pooling_func, dim_src_height_, dim_src_width_, dim_dst_height_, dim_dst_width_, stride_height_,
                    stride_width_, dim_kernel_height_, dim_kernel_width_, padding_height_, padding_width_, act_min_,
                    act_max_, ch_src_, input_tensor_, buffer_str, output_tensor_);
  context->AppendCode(code.str());
  return RET_OK;
 }
 int PoolingInt8Coder::SetParameters() {
  dim_src_height_ = input_tensor_->Height();
  dim_src_width_ = input_tensor_->Width();
  dim_dst_height_ = output_tensor_->DimensionSize(1);
  dim_src_width_ = output_tensor_->DimensionSize(2);
  ch_src_ = input_tensor_->Channel();
  stride_height_ = pooling_parameter_->stride_h_;
  stride_width_ = pooling_parameter_->stride_w_;
  dim_kernel_height_ = pooling_parameter_->window_h_;
  dim_kernel_width_ = pooling_parameter_->window_w_;
  // only use pad_u_ and pad_l_ because their value is consistent with tf
  // ref: mindspore/lite/src/ops/conv2d.cc:ConvInferShape
  padding_height_ = pooling_parameter_->pad_u_;
  padding_width_ = pooling_parameter_->pad_l_;
  MS_CHECK_TRUE(!output_tensor_->quant_params().empty(), "output quant_params is empty");
  QuantArg output_quant_arg = output_tensor_->quant_params().at(0);
  CalculateActivationRangeQuantized(pooling_parameter_->act_type_ == ActType_Relu,
                                    pooling_parameter_->act_type_ == ActType_Relu6, output_quant_arg.zeroPoint,
                                    output_quant_arg.scale, &act_min_, &act_max_);
  MS_CHECK_TRUE(input_tensor_->Channel() == output_tensor_->Channel(),
                "input Channel and output Channel size not match!");
  return RET_OK;
 }
 REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Pooling, CPUOpCoderCreator<PoolingInt8Coder>)
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.h
@@ -0,0 +1,63 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_POOLING_INT8_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_POOLING_INT8_CODER_H_
 #include <string>
 #include <memory>
 #include <vector>
 #include "micro/coder/opcoders/op_coder.h"
 #include "nnacl/int8/pooling_int8.h"
 namespace mindspore::lite::micro::cmsis {
 class PoolingInt8Coder final : public OperatorCoder {
 public:
  PoolingInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                   const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~PoolingInt8Coder() override = default;
  int Prepare(CoderContext *const context) override;
  int DoCode(CoderContext *const context) override;
 private:
  int SetParameters();
  int dim_src_height_{0};
  int dim_src_width_{0};
  int dim_dst_height_{0};
  int dim_dst_width_{0};
  int stride_height_{0};
  int stride_width_{0};
  int dim_kernel_height_{0};
  int dim_kernel_width_{0};
  int padding_height_{0};
  int padding_width_{0};
  int act_min_{0};
  int act_max_{0};
  int ch_src_{0};
  int32_t *buffer_{nullptr};
  size_t buffer_size_{0};
  PoolingParameter *pooling_parameter_{nullptr};
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_POOLING_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.cc
@@ -0,0 +1,51 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.h"
 #include <vector>
 #include <string>
 #include "micro/coder/opcoders/serializers/serializer.h"
 using mindspore::schema::PrimitiveType_Reshape;
 namespace mindspore::lite::micro::cmsis {
 int ReshapeInt8Coder::DoCode(CoderContext *const context) {
  int elements_num = input_tensor_->ElementsNum();
  std::vector<QuantArg> input_quant_args = input_tensor_->quant_params();
  std::vector<QuantArg> output_quant_args = output_tensor_->quant_params();
  MS_CHECK_TRUE(!input_quant_args.empty(), "input quant_params is empty");
  MS_CHECK_TRUE(!output_quant_args.empty(), "output quant_params is empty");
  // in Int8Reshape, the following values are checked. then it will do a memory copy
  // para.in_args_.scale_ == para.out_args_.scale_ && para.in_args_.zp_ == para.out_args_.zp_
  MS_CHECK_TRUE((input_quant_args.at(0).scale == output_quant_args.at(0).scale &&
                 input_quant_args.at(0).zeroPoint == output_quant_args.at(0).zeroPoint),
                "the quant arg of input and output should be the same!");
  Serializer code;
  code.precision(kPrecision);
  code.CodeFunction("memcpy", output_tensor_, input_tensor_, elements_num);
  MS_LOG(INFO) << "ReshapeInt8Coder has been called";
  context->AppendCode(code.str());
  return RET_OK;
 }
 REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Reshape, CPUOpCoderCreator<ReshapeInt8Coder>)
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.h
@@ -0,0 +1,36 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_RESHAPE_INT8_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_RESHAPE_INT8_CODER_H_
 #include <vector>
 #include "micro/coder/opcoders/op_coder.h"
 namespace mindspore::lite::micro::cmsis {
 class ReshapeInt8Coder : public OperatorCoder {
 public:
  ReshapeInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                   const Model::Node *node, size_t node_index, Target target)
      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~ReshapeInt8Coder() override = default;
  int Prepare(CoderContext *const context) override { return RET_OK; }
  int DoCode(CoderContext *const context) override;
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_RESHAPE_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc
@@ -0,0 +1,81 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "micro/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.h"
 #include <limits>
 #include "micro/coder/opcoders/serializers/serializer.h"
 #include "micro/coder/opcoders/file_collector.h"
 using mindspore::schema::PrimitiveType_SoftMax;
 namespace mindspore::lite::micro::cmsis {
 int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
  SoftmaxBaseCoder::Init();
  MS_CHECK_TRUE(!input_tensor_->quant_params().empty(), "input quant_params is empty");
  QuantArg in_quant_arg = input_tensor_->quant_params().at(0);
  quant_params_.in_quant_args_.zp_ = -in_quant_arg.zeroPoint;
  std::vector<QuantArg> out_quant_args = output_tensor_->quant_params();
  MS_CHECK_TRUE(!out_quant_args.empty(), "output quant_params is empty");
  quant_params_.out_quant_arg_.scale_ = static_cast<float>(out_quant_args.at(0).scale);
  quant_params_.out_quant_arg_.zp_ = out_quant_args.at(0).zeroPoint;
  quant_params_.output_activation_min_ = std::numeric_limits<int8_t>::min();
  quant_params_.output_activation_max_ = std::numeric_limits<int8_t>::max();
  const int total_signed_bits = 31;
  const int input_integer_bits = 5;
  const double input_real_multiplier =
    MSMIN(in_quant_arg.scale * (1 << (unsigned int)(total_signed_bits - input_integer_bits)),
          (1ll << total_signed_bits) - 1.0);
  // mult, shift
  QuantizeMultiplier(input_real_multiplier, &mult_, &shift_);
  // Calculate Input Radius
  const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
                                    (1ll << static_cast<size_t>((total_signed_bits - input_integer_bits))) /
                                    (1ll << static_cast<size_t>(shift_));
  diff_min_ = -1.0 * static_cast<int>(std::floor(max_input_rescaled));
  const int trailing_dim = static_cast<int>(input_tensor_->shape().size()) - 1;
  const int dims_count = input_tensor_->shape().size();
  MS_CHECK_TRUE(0 <= trailing_dim && trailing_dim < dims_count, "trailing_dim should be in [0, dims_count)");
  num_rows_ = 1;
  for (int i = 0; i < dims_count; ++i) {
    num_rows_ *= (i == trailing_dim) ? 1 : input_tensor_->DimensionSize(i);
  }
  MS_CHECK_TRUE(input_tensor_->DimensionSize(trailing_dim) == output_tensor_->DimensionSize(trailing_dim),
                "input and output DimensionSize mismatch");
  row_size_ = input_tensor_->DimensionSize(trailing_dim);
  ReSize();
  return RET_OK;
 }
 int SoftMaxInt8Coder::DoCode(CoderContext *const context) {
  Serializer code;
  code.precision(kPrecision);
  Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_softmax_s8.c"});
  code.CodeFunction("arm_softmax_s8", input_tensor_, num_rows_, row_size_, mult_, shift_, diff_min_, output_tensor_);
  MS_LOG(INFO) << "SoftMaxInt8Coder has been called";
  context->AppendCode(code.str());
  return RET_OK;
 }
 REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_SoftMax, CPUOpCoderCreator<SoftMaxInt8Coder>)
 }  // namespace mindspore::lite::micro::cmsis
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.h
@@ -0,0 +1,49 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_SOFTMAX_INT8_CODER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_SOFTMAX_INT8_CODER_H_
 #include <string>
 #include <memory>
 #include <vector>
 #include "micro/coder/opcoders/base/softmax_base_coder.h"
 namespace mindspore::lite::micro::cmsis {
 class SoftMaxInt8Coder final : public SoftmaxBaseCoder {
 public:
  SoftMaxInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                   const Model::Node *node, size_t node_index, Target target)
      : SoftmaxBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
  ~SoftMaxInt8Coder() override = default;
  int Prepare(CoderContext *const context) override;
  int DoCode(CoderContext *const context) override;
 private:
  int32_t num_rows_{0};
  int32_t row_size_{0};
  int32_t mult_{0};
  int32_t shift_{0};
  int32_t diff_min_{0};
 };
 }  // namespace mindspore::lite::micro::cmsis
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_CMSIS_NN_SOFTMAX_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/serializers/serializer.h
+++ b/mindspore/lite/micro/coder/opcoders/serializers/serializer.h
@@ -0,0 +1,233 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_SERIALIZERS_SERIALIZER_H_
 #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_SERIALIZERS_SERIALIZER_H_
 #include <vector>
 #include <string>
 #include <sstream>
 #include "micro/coder/utils/print_utils.h"
 #include "micro/coder/allocator/allocator.h"
 namespace mindspore::lite::micro {
 /*
 *  convert array T[] to string
 *  std::ostream &operator<<(std::ostream &, const ::T &) must exist
 *  arr shouldn't be pointer, T* is not valid
 *  example:
 *      int arr[] = {1, 2, 3};
 *      ToString(arr);
 *  the code above would produce:
 *      "{1, 2, 3}"
 */
 template <typename T, unsigned int N>
 std::string ToString(const T (&arr)[N]) {
  std::stringstream code;
  int n = N;
  while (n > 0 && arr[n - 1] == 0) {
    n--;
  }
  code << "{";
  for (int i = 0; i < n - 1; ++i) {
    code << arr[i] << ", ";
  }
  if (n > 0) {
    code << arr[n - 1];
  }
  code << "}";
  return code.str();
 }
 class Serializer {
 public:
  Serializer() = default;
  virtual ~Serializer() = default;
  /*
   * Code function call to generated code
   * First parameter is the function name, the rest are the parameters of the function
   * example:
   *    CodeFunction("function", "foo", "bar", "foobar", 42);
   * the code above would produce:
   *    "function("foo", "bar", "foobar", 42);\n"
   */
  template <typename... PARAMETERS>
  void CodeFunction(const std::string &name, PARAMETERS... parameters) {
    code << name << "(";
    GenCode(parameters...);
    code << ");\n";
  }
  /*
   * Code function call to generated code, with checking the return code
   * First parameter is the function name, the rest are the parameters of the function
   * example:
   *    CodeFunctionWithCheck("function", "foo", "bar", "foobar", 42);
   * the code above would produce:
   * """
   *    if(function("foo", "bar", "foobar", 42) != 0) {\n
   *       return -1;
   *    }
   * """
   */
  template <typename... PARAMETERS>
  void CodeFunctionWithCheck(const std::string &name, PARAMETERS... parameters) {
    code << "if(" << name << "(";
    GenCode(parameters...);
    code << ") != RET_OK) {\n";
    code << "  return RET_ERROR;\n";
    code << "}\n";
  }
  /*
   * helper function for coding
   * example:
   *    int bar[] = {1 ,3, 2};
   *    CodeArray("bar", bar, 3);
   * the code above would produce:
   *    "int bar[3] = {1 ,3, 2};\n"
   */
  template <typename T>
  void CodeArray(const std::string &name, T *data, int length, bool is_const = true) {
    std::string type = GetVariableTypeName<T>();
    if (is_const) {
      code << "const " << type << " " << name << "[" << length << "] = {";
    } else {
      code << "static " << type << " " << name << "[" << length << "] = {";
    }
    for (int i = 0; i < length - 1; ++i) {
      code << data[i] << ", ";
    }
    if (length > 0) {
      code << data[length - 1];
    }
    code << "};\n";
  }
  template <typename T>
  void CodeMallocExpression(T t, size_t size) {
    GenCode(t);
    code << " = malloc(" << size << ");\n";
    code << "if (";
    GenCode(t);
    code << " == NULL) {\n";
    code << "  return RET_ERROR;\n";
    code << "}\n";
  }
  std::streamsize precision(std::streamsize size) {
    std::streamsize old = code.precision(size);
    return old;
  }
  std::string str() const { return code.str(); }
  template <typename T>
  Serializer &operator<<(T t) {
    code << t;
    return *this;
  }
  /*
   * helper function for CodeStruct
   * all parameters should be
   * example:
   * given:
   *    typedef struct Foo {
   *      int array[5];
   *      int *pointer;
   *      int count;
   *    } Foo;
   *    int pointer[] = {1 ,3, 2, 42};
   *    Foo foo = {{1, 2, 3}, pointer, 4};
   *    the CodeStruct should be written as:
   *    CodeStruct(const string &name, const Foo &foo) {
   *      CodeArray("pointer_gen", foo.pointer, foo.count);
   *      CodeBaseStruct("Foo", "foo_gen", ToString(foo.array), "pointer_gen", foo.count);
   *    }
   * the code above would produce:
   *    "int pointer_gen[4] = {1 ,3, 2, 42};\n
   *    const Foo foo_gen = {{1, 2, 3}, pointer_gen, 4};\n"
   */
  template <typename... PARAMETERS>
  void CodeBaseStruct(const std::string &type, const std::string &name, PARAMETERS... parameters) {
    code << "const " << type << " " << name << " = {";
    GenCode(parameters...);
    code << "};\n";
  }
 protected:
  std::ostringstream code;
 private:
  /*
   *   function GenCode(Args... args)
   *   Convert all parameters to string, and join connect them with comma ", "
   *   example:
   *      GenCode(true, false, static_cast<int8_t>(12), static_cast<uint8_t>(57), 'c', 5567);
   *   the code above would produce:
   *      "true, false, 12, 57, c, 5567"
   */
  template <typename T, typename... REST>
  void GenCode(T t, REST... args) {
    GenCode(t);
    code << ", ";
    GenCode(args...);
  }
  template <typename T>
  void GenCode(T t) {
    code << t;
  }
  /*
   *  Convert pointer to string when it's in MemoryAllocator (and it should be)
   *  if t is not in the table of MemoryAllocator, it would return empty string ""
   *  then the coder would generate something like
   *    {foo, , bar}
   *  and make the generated code
   *  not compilable rather than generating code like
   *    {foo, 0x7ffed0cd377c, bar}
   *  which would bring the hard coded address to the runtime and make it harder to debug
   *
   *  if t is nullptr, "NULL" would be coded to generated code because some pointer might
   *  be nullptr in some cases and we want to code it.
   *  In this function, passing nullptr **would not** be regarded as a bug or mistake
   */
  template <typename T>
  void GenCode(T *t) {
    if (t == nullptr) {
      code << "NULL";
    } else {
      std::string name = MemoryAllocator::GetInstance()->GetRuntimeAddr(t);
      if (name.empty()) {
        MS_LOG(ERROR) << "pointer is not allocated by the allocator";
        exit(1);
      }
      code << name;
    }
  }
  // std::boolalpha converts bool to string literals {"true", "false"} instead of {1, 0}
  void GenCode(bool t) { code << std::boolalpha << t; }
  void GenCode(int8_t t) { code << std::to_string(t); }
  void GenCode(uint8_t t) { code << std::to_string(t); }
  void GenCode(decltype(nullptr) t) { code << "NULL"; }
  void GenCode(const char *t) { code << t; }
 };
 }  // namespace mindspore::lite::micro
 #endif  // MINDSPORE_LITE_MICRO_CODER_SERIALIZERS_SERIALIZER_H_