!15980 [MS][LITE][TOD] Add weight quantization to serialization of flatbuffer in runtime

From: @ehaleva Reviewed-by: Signed-off-by:
5 years ago · dc4815860b
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@@ -49,6 +49,7 @@ set(LITE_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/common/prim_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/common/tensor_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/common/loader_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/common/quant_utils.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_api.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/thread_pool.c
@@ -124,6 +125,7 @@ if(SUPPORT_TRAIN)
            ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc
            )
    if(ENABLE_V0)
      set(LITE_SRC
@@ -192,7 +194,10 @@ if(BUILD_MINDDATA STREQUAL "lite")
    target_link_libraries(mindspore-lite_static minddata_eager_mid)
 endif()
 if(SUPPORT_TRAIN)
    add_dependencies(mindspore-lite fbs_inner_src)
    add_dependencies(mindspore-lite_static fbs_inner_src)
    target_link_libraries(mindspore-lite minddata-lite)
    target_link_libraries(mindspore-lite_static minddata-lite)
 endif()


--- a/mindspore/lite/src/common/quant_utils.cc
+++ b/mindspore/lite/src/common/quant_utils.cc
@@ -0,0 +1,104 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "schema/inner/model_generated.h"
 #include "src/common/quant_utils.h"
 #include "src/lite_kernel.h"

 namespace mindspore {
 namespace lite {

 void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
                         bool channel_at_first, float *desired_max, float *desired_min) {
  float min = FLT_MAX;
  float max = -FLT_MAX;
  // find min and max
  for (int j = 0; j < one_filter_size; j++) {
    auto index = j + i * one_filter_size;
    if (!channel_at_first) {
      index = j * channels + i;
    }
    if (index >= elem_count) {
      MS_LOG(ERROR) << "over flow!";
    }
    min = std::min(min, raw_datas[index]);
    max = std::max(max, raw_datas[index]);
  }
  *desired_max = max;
  *desired_min = min;
 }

 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
                             int quant_min, int num_bits) {
  MS_ASSERT(quantParam != nullptr);
  if (mMin > 0.0f) {
    MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision";
    mMin = 0.0f;
  }
  if (mMax < 0.0f) {
    MS_LOG(DEBUG) << "mMax " << mMax << " is smaller than 0, set to 0, this may course low precision";
    mMax = 0.0f;
  }
  if (mMin > mMax) {
    MS_LOG(ERROR) << "cal error while min" << mMin << ">" << mMax;
    return RET_PARAM_INVALID;
  }
  if (mMin == mMax) {
    if (mMin != 0.0f) {
      MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other";
      return RET_ERROR;
    }
    quantParam->inited = true;
    quantParam->min = mMin;
    quantParam->max = mMax;
    quantParam->scale = 0.0f;
    quantParam->zeroPoint = 0;
    quantParam->narrowRange = narrowRange;
    quantParam->numBits = num_bits;
    return RET_OK;
  }

  auto quantMinFloat = static_cast<double>(quant_min);
  auto quantMaxFloat = static_cast<double>(quant_max);
  if (fabs(quantMaxFloat - quantMinFloat) <= 0.0f) {
    MS_LOG(ERROR) << "divisor cannot be 0";
    return RET_ERROR;
  }
  double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat);
  if (fabs(scale) <= 0.0f) {
    MS_LOG(ERROR) << "divisor 'scale' cannot be 0";
    return RET_ERROR;
  }
  const double zeroPointFromMin = quantMinFloat - mMin / scale;
  int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin));

  // The zero point should always be in the range of quantized value,
  // [qmin, qmax].
  MS_ASSERT(zeroPoint >= quant_min);
  MS_ASSERT(zeroPoint <= quant_max);
  quantParam->inited = true;
  quantParam->min = mMin;
  quantParam->max = mMax;
  quantParam->scale = scale;
  quantParam->zeroPoint = zeroPoint;
  quantParam->narrowRange = narrowRange;
  quantParam->numBits = num_bits;

  return RET_OK;
 }

 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/common/quant_utils.h
+++ b/mindspore/lite/src/common/quant_utils.h
@@ -0,0 +1,234 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_
 #define MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_

 #include <float.h>
 #include <cmath>
 #include <climits>
 #include <limits>
 #include <algorithm>
 #include <vector>
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
 #include "ir/dtype/type_id.h"

 namespace mindspore {

 namespace schema {
 struct QuantParamT;
 }

 namespace lite {
 const int RET_QUANT_CONTINUE = 2;
 static constexpr double SCALE_THREASHOLD = 1e-38;

 static constexpr int kPerTensor = 1;

 inline int QuantMax(int bits, TypeId type) {
  if (type == kNumberTypeInt8) {
    return (1 << (bits - 1)) - 1;
  } else if (type == kNumberTypeUInt8) {
    return (1 << bits) - 1;
  }
  return 0;
 }

 inline int QuantMin(int bits, TypeId type) {
  if (type == kNumberTypeInt8) {
    return -(1 << (bits - 1));
  }
  return 0;
 }

 void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
                         bool channel_at_first, float *desired_max, float *desired_min);

 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
                             int quant_min, int num_bits);

 template <typename T>
 T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
  MS_ASSERT(quantParam != nullptr);
  MS_ASSERT(quantParam->inited);
  const auto scale = quantParam->scale;
  const auto zeroPoint = quantParam->zeroPoint;
  const auto numBit = quantParam->numBits;
  const auto narrowRange = quantParam->narrowRange;
  double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1);
  const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<T>::min()) * scale;
  double minLimit;
  if (narrowRange) {
    minLimit = static_cast<float>(std::numeric_limits<T>::min() + 1 - zeroPoint) * scale;
  } else {
    minLimit = static_cast<float>(std::numeric_limits<T>::min() - zeroPoint) * scale;
  }

  return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
    double tmp;
    if (originData > maxLimit) {
      tmp = maxLimit;
    } else if (originData < minLimit) {
      tmp = minLimit;
    } else {
      tmp = originData;
    }
    auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale));
    return quantData;
  }();
 }

 template <typename T>
 T QuantizeData(float originData, const schema::QuantParamT *quantParam, int quant_max, int quant_min) {
  MS_ASSERT(quantParam != nullptr);
  MS_ASSERT(quantParam->inited);
  const auto scale = quantParam->scale;
  const int zeroPoint = quantParam->zeroPoint;
  const int maxLimit = quant_max;
  const int minLimit = quant_min;

  if (scale <= SCALE_THREASHOLD) {
    return 0;
  }

  return [maxLimit, minLimit, zeroPoint, scale, originData] {
    auto quant_data = std::round(originData / scale + zeroPoint);
    if (quant_data > maxLimit) {
      quant_data = maxLimit;
    } else if (quant_data < minLimit) {
      quant_data = minLimit;
    }
    return static_cast<T>(quant_data);
  }();
 }

 template <typename T>
 STATUS DoPerLayerQuant(const float *raw_datas, size_t elem_count, std::vector<schema::QuantParamT> *quant_params,
                       const int &quant_max, const int &quant_min, const size_t &bit_num, const bool &k_means,
                       std::vector<T> *quant_datas) {
  float min = FLT_MAX;
  float max = -FLT_MIN;
  for (uint32_t i = 0; i < elem_count; i++) {
    min = std::min(min, raw_datas[i]);
    max = std::max(max, raw_datas[i]);
  }

  schema::QuantParamT quant_param;
  if (!k_means) {
    STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
      return status;
    }
  }
  quant_params->emplace_back(quant_param);
  // update data and datatype
  for (uint32_t i = 0; i < elem_count; i++) {
    float raw_data = raw_datas[i];
    if (!k_means) {
      auto quant_data = QuantizeData<T>(raw_data, &quant_param, quant_max, quant_min);
      (*quant_datas)[i] = quant_data;
    }
  }
  return RET_OK;
 }

 template <typename T>
 STATUS DoPerChannelQuant(const float *raw_datas, size_t elem_count, const schema::QuantType &quant_type,
                         std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
                         const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas, int channels,
                         bool channel_at_first = true) {
  static const int quant_param_size = 32 * 8;
  std::vector<float> dequant_datas(quant_datas->size());
  if (channels <= 0) {
    MS_LOG(ERROR) << "channels must be greater than 0";
    return RET_ERROR;
  }
  size_t one_filter_size = elem_count / channels;
  bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size;
  if (!do_quant && quant_type == schema::QuantType_WeightQuant) {
    MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size;
    return RET_QUANT_CONTINUE;
  }
  for (int i = 0; i < channels; i++) {
    float min = FLT_MAX;
    float max = -FLT_MAX;
    GetMaxMinPerchannel(channels, one_filter_size, i, elem_count, raw_datas, channel_at_first, &max, &min);
    schema::QuantParamT quant_param;
    STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
      return status;
    }
    // do quantization
    double average_dequant = 0;
    double average_raw = 0;
    for (uint32_t j = 0; j < one_filter_size; j++) {
      auto index = j + i * one_filter_size;
      if (!channel_at_first) {
        index = j * channels + i;
      }
      MS_ASSERT(index < elem_count);
      float raw_data = raw_datas[index];
      auto quant_data = QuantizeData<T>(raw_data, &quant_param, quant_max, quant_min);
      (*quant_datas)[index] = quant_data;

      if (quant_type == schema::QuantType_WeightQuant) {
        float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint);
        dequant_datas[index] = dequant_data;
        average_dequant += dequant_data;
        average_raw += raw_data;
      }
    }
    if (quant_type == schema::QuantType_WeightQuant && !k_means) {
      // mean
      average_dequant = average_dequant / one_filter_size;
      average_raw = average_raw / one_filter_size;
      // std
      double variance_dequant = 0;
      double variance_raw = 0;
      for (uint32_t j = 0; j < one_filter_size; j++) {
        auto index = j + i * one_filter_size;
        if (!channel_at_first) {
          index = j * channels + i;
        }
        MS_ASSERT(index < elem_count);
        variance_dequant += std::pow(dequant_datas[index] - average_dequant, 2);
        variance_raw += std::pow(raw_datas[index] - average_raw, 2);
      }
      variance_dequant = std::sqrt(variance_dequant / one_filter_size);
      variance_raw = std::sqrt(variance_raw / one_filter_size);
      quant_param.varCorr = 1;
      if (variance_raw != 0 && variance_dequant != 0) {
        auto temp_var_corr = variance_raw / variance_dequant;
        if (temp_var_corr > 0 && temp_var_corr < 10) {
          quant_param.varCorr = temp_var_corr;
        } else {
          MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr;
        }
      }
      quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr;
    }
    quant_params->emplace_back(quant_param);
  }
  return RET_OK;
 }

 }  // namespace lite
 }  // namespace mindspore

 #endif  // MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_
--- a/mindspore/lite/src/train/train_export.cc
+++ b/mindspore/lite/src/train/train_export.cc
@@ -23,46 +23,93 @@
 #include <set>
 #include "schema/inner/model_generated.h"
 #include "src/train/train_utils.h"
 #include "src/common/quant_utils.h"
 #include "tools/common/storage.h"

 namespace mindspore {
 namespace lite {

 std::vector<uint8_t> TrainExport::CreateData(const mindspore::lite::Tensor *tensor) {
 std::vector<uint8_t> TrainExport::CreateData(const lite::Tensor *tensor) {
  uint8_t *tensor_data = reinterpret_cast<uint8_t *>(tensor->data_c());
  auto size = tensor->Size();
  std::vector<uint8_t> data(tensor_data, tensor_data + size);
  return data;
 }

 bool TrainExport::NeedQuantization(const lite::Tensor *tensor) {
  return (tensor->quant_params().size() > 0 && tensor->quant_params().at(0).inited);
 }

 schema::QuantType TrainExport::GetNodeQuantType(const kernel::LiteKernel *kernel) {
  if (std::any_of(kernel->in_tensors().cbegin(), kernel->in_tensors().cend(), [](const lite::Tensor *t) {
        return (t->IsConst() && (t->quant_params().size() > 0) && (t->quant_params().at(0).inited));
      })) {
    return schema::QuantType_QUANT_WEIGHT;
  }
  return schema::QuantType_QUANT_NONE;
 }

 int TrainExport::QuantTensorData(schema::TensorT *dest_tensor, const lite::Tensor *src_tensor) {
  int channels = src_tensor->quant_params().size();
  if (channels < 1) {
    MS_LOG(ERROR) << "Quant Params is empty";
    return RET_ERROR;
  }
  int bit_num = src_tensor->quant_params().at(0).bitNum;
  int quant_max = QuantMax(bit_num, kNumberTypeInt8);
  int quant_min = QuantMin(bit_num, kNumberTypeInt8);
  std::vector<int8_t> data(src_tensor->ElementsNum());
  std::vector<schema::QuantParamT> quant_params;

  STATUS ret = RET_OK;
  if (channels == kPerTensor) {
    ret = DoPerLayerQuant<int8_t>(reinterpret_cast<float *>(src_tensor->data_c()), src_tensor->ElementsNum(),
                                  &(quant_params), quant_max, quant_min, bit_num, false, &data);
  } else {
    bool channel_at_first = (src_tensor->shape().at(0) == channels);
    ret = DoPerChannelQuant<int8_t>(reinterpret_cast<float *>(src_tensor->data_c()), src_tensor->ElementsNum(),
                                    schema::QuantType_WeightQuant, &(quant_params), quant_max, quant_min, bit_num,
                                    false, &data, channels, channel_at_first);
  }
  if (ret == RET_QUANT_CONTINUE) {
    MS_LOG(DEBUG) << "No Need to quant per channel";
    return RET_OK;
  }
  if (ret == RET_ERROR) {
    MS_LOG(ERROR) << "QuantTensorData error,  channels = " << channels;
    return ret;
  }
  if (quant_params.empty()) {
    MS_LOG(ERROR) << "quant_params empty";
    return RET_ERROR;
  }
  dest_tensor->data = std::vector<uint8_t>(data.data(), data.data() + data.size());
  dest_tensor->dataType = kNumberTypeInt8;
  dest_tensor->quantParams.clear();
  for (auto quant_param : quant_params) {
    dest_tensor->quantParams.emplace_back(std::make_unique<schema::QuantParamT>(quant_param));
  }

  return RET_OK;
 }

 std::unique_ptr<schema::TensorT> TrainExport::CreateTensor(const mindspore::lite::Tensor *tensor,
                                                           schema::Tensor *scTensor) {
  auto tensorT = std::make_unique<schema::TensorT>();
  tensorT->nodeType = scTensor->nodeType();
  tensorT->dataType = tensor->data_type();
  tensorT->dims = tensor->shape();
  tensorT->format = tensor->format();
  tensorT->name = tensor->tensor_name();
  tensorT->refCount = 0;
  tensorT->offset = 0;
  tensorT->dataType = tensor->data_type();
  tensorT->enableHuffmanCode = false;
  if ((tensorT->nodeType == NodeType_ValueNode) && (scTensor->data() != nullptr) && (scTensor->data()->size() > 0)) {
    tensorT->data = CreateData(tensor);
  }
  for (auto quant_param : tensor->quant_params()) {
    auto quantParamT = std::make_unique<schema::QuantParamT>();
    quantParamT->scale = quant_param.scale;
    quantParamT->zeroPoint = quant_param.zeroPoint;
    quantParamT->min = 0;
    quantParamT->max = 0;
    quantParamT->narrowRange = true;
    quantParamT->numBits = quant_param.bitNum;
    quantParamT->inited = quant_param.inited;
    quantParamT->varCorr = quant_param.var_corr;
    quantParamT->meanCorr = quant_param.mean_corr;
    quantParamT->dstDtype = quant_param.dstDtype;
    quantParamT->roundType = quant_param.roundType;
    quantParamT->multiplier = quant_param.multiplier;
    tensorT->quantParams.emplace_back(std::move(quantParamT));
    if (NeedQuantization(tensor)) {
      QuantTensorData(tensorT.get(), tensor);
    } else {
      tensorT->data = CreateData(tensor);
    }
  }
  tensorT->quantClusters = tensor->quant_clusters();
  return tensorT;
@@ -85,7 +132,7 @@ std::unique_ptr<schema::CNodeT> TrainExport::CreateCNode(const mindspore::kernel
  cnodeT->inputIndex = inputIndex;
  cnodeT->outputIndex = outputIndex;
  cnodeT->name = kernel->name();
  cnodeT->quantType = schema::QuantType_QUANT_NONE;
  cnodeT->quantType = GetNodeQuantType(kernel);
  // find kernel in model
  auto *node = FindNode(kernel);
  if (node == nullptr) {
@@ -132,7 +179,6 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
        MS_LOG(ERROR) << "cannot find tensor " + tensor->ToString() + " in model";
        return RET_ERROR;
      }
      out_set.insert(id);
      auto it = remap.find(id);
      if (it == remap.end()) {
        remap[id] = tensor_idx;
@@ -153,7 +199,7 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
    schema::Tensor *scTensor = model_->all_tensors_.at(id);
    auto tensorT = CreateTensor(tensor, scTensor);
    // find a tensor which is not an output
    if (out_set.find(id) == out_set.end()) {
    if (out_set.find(remap[id]) == out_set.end()) {
      if ((tensorT->nodeType == NodeType_ValueNode) && (tensorT->data.size() == 0)) {
        meta_graph->inputIndex.push_back(remap[id]);
      }
@@ -165,7 +211,7 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
    meta_graph->allTensors.emplace_back(std::move(tensorT));
  }
  auto graph = meta_graph.release();
  int err = SaveToFile(graph, file_name_);
  int err = Storage::Save(*graph, file_name_);
  if (err != RET_OK) {
    MS_LOG(ERROR) << "failed to save flatbuffer file " << file_name_;
  }
@@ -173,30 +219,5 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
  return err;
 }

 int TrainExport::SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath) {
  flatbuffers::FlatBufferBuilder builder(1024);
  auto offset = schema::MetaGraph::Pack(builder, graph);
  builder.Finish(offset);
  schema::FinishMetaGraphBuffer(builder, offset);
  int size = builder.GetSize();
  auto content = builder.GetBufferPointer();
  if (content == nullptr) {
    MS_LOG(ERROR) << "GetBufferPointer nullptr";
    return RET_ERROR;
  }
  if (access((outputPath + ".ms").c_str(), F_OK) == 0) {
    chmod((outputPath + ".ms").c_str(), S_IWUSR);
  }
  std::ofstream output(outputPath + ".ms", std::ofstream::binary);
  if (!output.is_open()) {
    MS_LOG(ERROR) << "Can not open output file: " << outputPath << ".ms";
    return RET_ERROR;
  }
  output.write((const char *)content, size);
  output.close();
  chmod((outputPath + ".ms").c_str(), S_IRUSR);
  return RET_OK;
 }

 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/train/train_export.h
+++ b/mindspore/lite/src/train/train_export.h
@@ -50,7 +50,10 @@ class TrainExport {
  std::unique_ptr<schema::TensorT> CreateTensor(const mindspore::lite::Tensor *tensor, schema::Tensor *scTensor);
  std::unique_ptr<schema::CNodeT> CreateCNode(const mindspore::kernel::LiteKernel *kernel,
                                              std::vector<uint32_t> inputIndex, std::vector<uint32_t> outputIndex);
  int SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath);

  bool NeedQuantization(const mindspore::lite::Tensor *tensor);
  virtual int QuantTensorData(schema::TensorT *dest_tensor, const mindspore::lite::Tensor *src_tensor);
  mindspore::schema::QuantType GetNodeQuantType(const mindspore::kernel::LiteKernel *kernel);
 };
 };  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/weight_decoder.cc
+++ b/mindspore/lite/src/weight_decoder.cc
@@ -32,7 +32,7 @@ std::vector<bool> StringToBitVector(const std::string &str) {
 }

 STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
  MS_LOG(ERROR) << "un-index weight";
  MS_LOG(DEBUG) << "un-index weight";
  auto bit_num = src_tensor.quantParams()->Get(0)->numBits();

  std::string str(reinterpret_cast<const char *>(src_tensor.data()->data()), src_tensor.data()->size());
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@@ -159,6 +159,7 @@ set(TEST_LITE_SRC
        ${LITE_DIR}/src/common/utils.cc
        ${LITE_DIR}/src/common/loader_util.cc
        ${LITE_DIR}/src/common/string_util.cc
        ${LITE_DIR}/src/common/quant_utils.cc
        ${LITE_DIR}/tools/common/flag_parser.cc
        ${LITE_DIR}/tools/benchmark/benchmark.cc
        ${LITE_DIR}/test/st/benchmark_test.cc
@@ -306,6 +307,7 @@ if(SUPPORT_TRAIN)
            ${LITE_DIR}/src/train/train_utils.cc
            ${LITE_DIR}/src/train/transfer_session.cc
            ${LITE_DIR}/src/lite_session.cc
            ${LITE_DIR}/tools/common/storage.cc
            )
 else()
    set(TEST_LITE_SRC
@@ -397,6 +399,10 @@ endif()
 add_executable(lite-test ${TEST_SRC})
 add_dependencies(lite-test fbs_src)

 if(SUPPORT_TRAIN)
    add_dependencies(lite-test fbs_inner_src)
 endif()

 target_link_libraries(lite-test dl mindspore::gtest)

 if(PLATFORM_ARM AND ENABLE_FP16)
--- a/mindspore/lite/test/models_ms_train.cfg
+++ b/mindspore/lite/test/models_ms_train.cfg
@@ -11,7 +11,7 @@ googlenet
 densenet
 shufflenetv2
 mini_alexnet weight_quant 2
 nin weight_quant 7
 nin weight_quant 9
 lenet weight_quant 5
 mobilenetv1 weight_quant 2
 mobilenetv2 weight_quant 2
--- a/mindspore/lite/test/run_net_train.sh
+++ b/mindspore/lite/test/run_net_train.sh
@@ -82,22 +82,27 @@ function Run_x86() {
        model_prefix=${line_array[0]}
        model_name=${line_array[0]}'_train'
        accuracy_limit=0.5
        export_file=""
        inference_file=""
        if [[ $model_name == \#* ]]; then
          continue
        fi
        if [[ "${line_array[1]}" == "weight_quant" ]]; then
            model_name=${line_array[0]}'_train_quant'
            accuracy_limit=${line_array[2]}
        else
            export_file="${ms_models_path}/${model_name}_tod"
            rm -f ${export_file}"*"
        fi
        if [[ "${save_lite}" == "1" ]]; then
          inference_file="${ms_models_path}/${model_name}_infer"
        fi
        inference_file="${ms_models_path}/${model_name}_infer"
        rm -f ${inference_file}"*"
        echo ${model_name} >> "${run_x86_log_file}"
        ${run_valgrind}./tools/benchmark_train/benchmark_train \
        --modelFile=${ms_models_path}/${model_name}.ms \
        --inDataFile=${train_io_path}/${model_prefix}_input1.bin,${train_io_path}/${model_prefix}_input2.bin \
        --inDataFile=${train_io_path}/${model_prefix}_input \
        --expectedDataFile=${train_io_path}/${model_prefix}_output --epochs=${epoch_num} --numThreads=${threads} \
        --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} >> "${run_x86_log_file}"
        --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} \
        --exportFile=${export_file} >> "${run_x86_log_file}"
        if [ $? = 0 ]; then
            run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file}
        else
@@ -168,21 +173,22 @@ function Run_arm() {
        model_prefix=${line_array[0]}
        model_name=${line_array[0]}'_train'
        accuracy_limit=0.5
        export_file=""
        if [[ $model_name == \#* ]]; then
            continue
        fi
        if [[ "${line_array[1]}" == "weight_quant" ]]; then
            model_name=${line_array[0]}'_train_quant'
            accuracy_limit=${line_array[2]}
        else
            export_file="${tmp_dir}/${model_name}_tod"
        fi
        inference_file="${tmp_dir}/${model_name}_infer"

        if [[ "${line_array[1]}" == "noarm32" ]] && [[ "$1" == arm32 ]]; then
            run_result=$1': '${model_name}' irrelevant'; echo ${run_result} >> ${run_benchmark_train_result_file}
            continue
        fi
        if [[ "${save_lite}" == "1" ]]; then
          inference_file="${ms_models_path}/${model_name}_infer"
        fi
        # run benchmark_train test without clib data
        echo ${model_name} >> "${run_arm_log_file}"
        adb -s ${device_id} push ${train_io_path}/${model_prefix}_input*.bin ${train_io_path}/${model_prefix}_output*.bin  /data/local/tmp/benchmark_train_test >> ${adb_push_log_file}
@@ -193,15 +199,20 @@ function Run_arm() {
        elif [ "$1" == arm32 ]; then
            echo 'cp  /data/local/tmp/arm32/libc++_shared.so ./' >> ${adb_cmd_run_file}
        fi 
        echo "rm -f ${tmp_dir}/${model_name}_exported.ms" >> ${run_arm_log_file}
        echo "rm -f ${tmp_dir}/${model_name}_exported.ms" >> ${adb_cmd_run_file}
        adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file}
        echo "rm -f ${export_file} ${inference_file}.ms" >> ${run_arm_log_file}
        echo "rm -f ${export_file} ${inference_file}.ms" >> ${adb_cmd_run_file}
        adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file}
        adb_cmd=$(cat <<-ENDM
        export LD_LIBRARY_PATH=./:/data/local/tmp/:/data/local/tmp/benchmark_train_test;./benchmark_train \
        --epochs=${epoch_num} \
        --modelFile=${model_name}.ms \
        --inDataFile=${tmp_dir}/${model_prefix}_input1.bin,${tmp_dir}/${model_prefix}_input2.bin \
        --inDataFile=${tmp_dir}/${model_prefix}_input \
        --expectedDataFile=${tmp_dir}/${model_prefix}_output \
        --numThreads=${threads} --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file}
        --numThreads=${threads} \
        --accuracyThreshold=${accuracy_limit} \
        --inferenceFile=${inference_file} \
        --exportFile=${export_file}
 ENDM
        )
        echo "${adb_cmd}" >> ${run_arm_log_file}
@@ -252,7 +263,7 @@ models_mindspore_train_config=${basepath}/models_ms_train.cfg
 epoch_num=1
 threads=2
 train_io_path=""
 while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do
 while getopts "r:M:c:m:d:i:e:vt:q:D" opt; do
    case ${opt} in
        r)
           release_path=${OPTARG}
@@ -295,8 +306,6 @@ while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do
            epoch_num=${OPTARG}
            echo "train epoch num is ${epoch_num}"
            ;;
        F)  save_lite=1
            ;;                          
        ?)
            echo "unknown para"
            exit 1;;
--- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc
+++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc
@@ -107,7 +107,7 @@ static STATUS CompressTensor(schema::TensorT *tensor_input, const std::unique_pt
    int bit_num = tensor_input->quantParams.at(0)->numBits;
    // Pack Repetition
    auto repetition_packed = false;
    MS_LOG(ERROR) << dst_node->name;
    MS_LOG(DEBUG) << dst_node->name;
    if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) {
      if (bit_num <= 8) {
        repetition_packed = PackRepetition<int8_t>(bit_num, tensor_input);
--- a/mindspore/lite/tools/benchmark_train/net_train.cc
+++ b/mindspore/lite/tools/benchmark_train/net_train.cc
@@ -32,8 +32,6 @@

 namespace mindspore {
 namespace lite {
 static const char *DELIM_COLON = ":";
 static const char *DELIM_COMMA = ",";
 static const char *DELIM_SLASH = "/";

 namespace {
@@ -81,8 +79,8 @@ int NetTrain::GenerateRandomData(size_t size, void *data) {
  return RET_OK;
 }

 int NetTrain::GenerateInputData() {
  for (auto tensor : ms_inputs_) {
 int NetTrain::GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
  for (auto tensor : *ms_inputs) {
    MS_ASSERT(tensor != nullptr);
    auto input_data = tensor->MutableData();
    if (input_data == nullptr) {
@@ -100,16 +98,16 @@ int NetTrain::GenerateInputData() {
  return RET_OK;
 }

 int NetTrain::LoadInput() {
 int NetTrain::LoadInput(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
  if (flags_->in_data_file_.empty()) {
    auto status = GenerateInputData();
    auto status = GenerateInputData(ms_inputs);
    if (status != RET_OK) {
      std::cerr << "Generate input data error " << status << std::endl;
      MS_LOG(ERROR) << "Generate input data error " << status;
      return status;
    }
  } else {
    auto status = ReadInputFile();
    auto status = ReadInputFile(ms_inputs);
    if (status != RET_OK) {
      std::cerr << "ReadInputFile error, " << status << std::endl;
      MS_LOG(ERROR) << "ReadInputFile error, " << status;
@@ -119,8 +117,8 @@ int NetTrain::LoadInput() {
  return RET_OK;
 }

 int NetTrain::ReadInputFile() {
  if (ms_inputs_.empty()) {
 int NetTrain::ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
  if (ms_inputs->empty()) {
    return RET_OK;
  }

@@ -128,16 +126,12 @@ int NetTrain::ReadInputFile() {
    MS_LOG(ERROR) << "Not supported image input";
    return RET_ERROR;
  } else {
    if (ms_inputs_.size() > flags_->input_data_list_.size()) {
      MS_LOG(ERROR) << "missing input files expecting " << ms_inputs_.size() << ",got "
                    << flags_->input_data_list_.size();
      return RET_ERROR;
    }
    for (size_t i = 0; i < ms_inputs_.size(); i++) {
      auto cur_tensor = ms_inputs_.at(i);
    for (size_t i = 0; i < ms_inputs->size(); i++) {
      auto cur_tensor = ms_inputs->at(i);
      MS_ASSERT(cur_tensor != nullptr);
      size_t size;
      char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size);
      std::string file_name = flags_->in_data_file_ + std::to_string(i + 1) + ".bin";
      char *bin_buf = ReadFile(file_name.c_str(), &size);
      if (bin_buf == nullptr) {
        MS_LOG(ERROR) << "ReadFile return nullptr";
        return RET_ERROR;
@@ -158,94 +152,12 @@ int NetTrain::ReadInputFile() {
  return RET_OK;
 }

 int NetTrain::CompareOutput() {
  std::cout << "================ Comparing Output data ================" << std::endl;
  float total_bias = 0;
  int total_size = 0;
  bool has_error = false;
  auto tensors_list = session_->GetOutputs();
  if (tensors_list.empty()) {
    MS_LOG(ERROR) << "Cannot find output tensors, get model output failed";
    return RET_ERROR;
  }
  mindspore::tensor::MSTensor *tensor = nullptr;
  int i = 1;
  for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) {
    tensor = session_->GetOutputByTensorName(it->first);
    std::cout << "output is tensor " << it->first << "\n";
    auto outputs = tensor->MutableData();
    size_t size;
    std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin";
    auto *bin_buf = ReadFileBuf(output_file.c_str(), &size);
    if (bin_buf == nullptr) {
      MS_LOG(ERROR) << "ReadFile return nullptr";
      return RET_ERROR;
    }

    if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) {
      if (static_cast<int>(size / sizeof(float)) != tensor->ElementsNum()) {
        MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size()
                      << ", read size: " << size / sizeof(float);
        return RET_ERROR;
      }
    } else {
      if (size != tensor->Size()) {
        MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size()
                      << ", read size: " << size;
        return RET_ERROR;
      }
    }
    float bias = 0.f;
    if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) {
 #ifdef ENABLE_FP16
      bias = CompareData<float16_t>(bin_buf, tensor->ElementsNum(), reinterpret_cast<float16_t *>(outputs));
 #endif
    } else {
      bias = CompareData<float>(bin_buf, tensor->ElementsNum(), reinterpret_cast<float *>(outputs));
    }
    if (bias >= 0) {
      total_bias += bias;
      total_size++;
    } else {
      has_error = true;
      break;
    }
    i++;
    delete[] bin_buf;
  }

  if (!has_error) {
    float mean_bias;
    if (total_size != 0) {
      mean_bias = total_bias / total_size * 100;
    } else {
      mean_bias = 0;
    }

    std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%"
              << " threshold is:" << this->flags_->accuracy_threshold_ << std::endl;
    std::cout << "=======================================================" << std::endl << std::endl;

    if (mean_bias > this->flags_->accuracy_threshold_) {
      MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%";
      std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl;
      return RET_ERROR;
    } else {
      return RET_OK;
    }
  } else {
    MS_LOG(ERROR) << "Error in CompareData";
    std::cerr << "Error in CompareData" << std::endl;
    std::cout << "=======================================================" << std::endl << std::endl;
    return RET_ERROR;
  }
 }
 int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lite_session) {
 int NetTrain::CompareOutput(const session::LiteSession &lite_session) {
  std::cout << "================ Comparing Forward Output data ================" << std::endl;
  float total_bias = 0;
  int total_size = 0;
  bool has_error = false;
  auto tensors_list = lite_session->GetOutputs();
  auto tensors_list = lite_session.GetOutputs();
  if (tensors_list.empty()) {
    MS_LOG(ERROR) << "Cannot find output tensors, get model output failed";
    return RET_ERROR;
@@ -253,9 +165,9 @@ int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lit
  mindspore::tensor::MSTensor *tensor = nullptr;
  int i = 1;
  for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) {
    tensor = lite_session->GetOutputByTensorName(it->first);
    tensor = lite_session.GetOutputByTensorName(it->first);
    std::cout << "output is tensor " << it->first << "\n";
    auto outputs = tensor->MutableData();
    auto outputs = tensor->data();
    size_t size;
    std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin";
    auto *bin_buf = ReadFileBuf(output_file.c_str(), &size);
@@ -307,7 +219,7 @@ int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lit
  }
 }

 int NetTrain::MarkPerformance() {
 int NetTrain::MarkPerformance(session::TrainSession *session) {
  MS_LOG(INFO) << "Running train loops...";
  std::cout << "Running train loops..." << std::endl;
  uint64_t time_min = 0xFFFFFFFFFFFFFFFF;
@@ -315,10 +227,10 @@ int NetTrain::MarkPerformance() {
  uint64_t time_avg = 0;

  for (int i = 0; i < flags_->epochs_; i++) {
    session_->BindThread(true);
    session->BindThread(true);
    auto start = GetTimeUs();
    auto status =
      flags_->time_profiling_ ? session_->RunGraph(before_call_back_, after_call_back_) : session_->RunGraph();
      flags_->time_profiling_ ? session->RunGraph(before_call_back_, after_call_back_) : session->RunGraph();
    if (status != 0) {
      MS_LOG(ERROR) << "Inference error " << status;
      std::cerr << "Inference error " << status;
@@ -330,7 +242,7 @@ int NetTrain::MarkPerformance() {
    time_min = std::min(time_min, time);
    time_max = std::max(time_max, time);
    time_avg += time;
    session_->BindThread(false);
    session->BindThread(false);
  }

  if (flags_->time_profiling_) {
@@ -352,10 +264,9 @@ int NetTrain::MarkPerformance() {
  return RET_OK;
 }

 int NetTrain::MarkAccuracy() {
 int NetTrain::MarkAccuracy(session::LiteSession *session) {
  MS_LOG(INFO) << "MarkAccuracy";
  std::cout << "MarkAccuracy" << std::endl;
  for (auto &msInput : ms_inputs_) {
  for (auto &msInput : session->GetInputs()) {
    switch (msInput->data_type()) {
      case TypeId::kNumberTypeFloat:
        PrintInputData<float>(msInput);
@@ -371,16 +282,14 @@ int NetTrain::MarkAccuracy() {
        return RET_ERROR;
    }
  }
  session_->Eval();

  auto status = session_->RunGraph(before_call_back_, after_call_back_);
  auto status = session->RunGraph();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Inference error " << status;
    std::cerr << "Inference error " << status << std::endl;
    return status;
  }

  status = CompareOutput();
  status = CompareOutput(*session);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Compare output error " << status;
    std::cerr << "Compare output error " << status << std::endl;
@@ -388,263 +297,107 @@ int NetTrain::MarkAccuracy() {
  }
  return RET_OK;
 }
 int NetTrain::MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite_session) {
  MS_LOG(INFO) << "MarkAccuracy";
  std::cout << "MarkAccuracy" << std::endl;
  for (auto &msInput : ms_inputs_) {
    switch (msInput->data_type()) {
      case TypeId::kNumberTypeFloat:
        PrintInputData<float>(msInput);
        break;
      case TypeId::kNumberTypeFloat32:
        PrintInputData<float>(msInput);
        break;
      case TypeId::kNumberTypeInt32:
        PrintInputData<int>(msInput);
        break;
      default:
        MS_LOG(ERROR) << "Datatype " << msInput->data_type() << " is not supported.";
        return RET_ERROR;
    }
  }
  auto status = lite_session->RunGraph();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Inference error " << status;
    std::cerr << "Inference error " << status << std::endl;
    return status;
  }

  status = CompareOutputLite(lite_session);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Compare output error " << status;
    std::cerr << "Compare output error " << status << std::endl;
    return status;
 static CpuBindMode FlagToBindMode(int flag) {
  if (flag == 2) {
    return MID_CPU;
  }
  return RET_OK;
  if (flag == 1) {
    return HIGHER_CPU;
  }
  return NO_BIND;
 }

 int NetTrain::RunExportedNet() {
 int NetTrain::CreateAndRunNetwork(const std::string &filename, int train_session, int epochs) {
  auto start_prepare_time = GetTimeUs();
  // Load graph
  std::string model_name = flags_->export_file_.substr(flags_->export_file_.find_last_of(DELIM_SLASH) + 1);

  MS_LOG(INFO) << "start reading exported model file";
  std::cout << "start reading exported model file" << std::endl;
  auto context = std::make_shared<Context>();
  if (context == nullptr) {
    MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
    std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
    return RET_ERROR;
  }

  if (flags_->cpu_bind_mode_ == 2) {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
  } else if (flags_->cpu_bind_mode_ == 1) {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
  } else {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
  }

  context->thread_num_ = flags_->num_threads_;

  auto *model = mindspore::lite::Model::Import(flags_->export_file_.c_str());
  std::string model_name = filename.substr(filename.find_last_of(DELIM_SLASH) + 1);
  Context context;
  context.device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = FlagToBindMode(flags_->cpu_bind_mode_);
  context.device_list_[0].device_info_.cpu_device_info_.enable_float16_ = flags_->enable_fp16_;
  context.device_list_[0].device_type_ = mindspore::lite::DT_CPU;
  context.thread_num_ = flags_->num_threads_;

  MS_LOG(INFO) << "start reading model file" << filename.c_str();
  std::cout << "start reading model file " << filename.c_str() << std::endl;
  auto *model = mindspore::lite::Model::Import(filename.c_str());
  if (model == nullptr) {
    MS_LOG(ERROR) << "create model for train session failed";
    return RET_ERROR;
  }

  session_ = session::TrainSession::CreateSession(model, context.get());
  if (session_ == nullptr) {
    MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
    std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
    return RET_ERROR;
  }
  if (flags_->loss_name_ != "") {
    session_->SetLossName(flags_->loss_name_);
  }
  ms_inputs_ = session_->GetInputs();
  auto end_prepare_time = GetTimeUs();
  MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
  std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;

  // Load input
  MS_LOG(INFO) << "start generate input data";
  auto status = LoadInput();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Generate input data error";
    return status;
  }

  if (!flags_->data_file_.empty()) {
    MS_LOG(INFO) << "Check accuracy for exported model";
    std::cout << "Check accuracy for exported model " << std::endl;
    status = MarkAccuracy();
    for (auto &data : data_) {
      data.second->shape.clear();
      data.second->data.clear();
      delete data.second;
    }
    data_.clear();
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status;
      std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl;
      return status;
  session::LiteSession *session = nullptr;
  session::TrainSession *t_session = nullptr;
  if (train_session) {
    t_session = session::TrainSession::CreateSession(model, &context);
    if (t_session == nullptr) {
      MS_LOG(ERROR) << "RunNetTrain CreateSession failed while running " << model_name.c_str();
      std::cout << "RunNetTrain CreateSession failed while running " << model_name.c_str() << std::endl;
      delete model;
      return RET_ERROR;
    }
  }
  return RET_OK;
 }

 int NetTrain::RunExportedNetLite(std::string file_name) {
  auto start_prepare_time = GetTimeUs();
  // Load graph
  std::string model_name = file_name.substr(file_name.find_last_of(DELIM_SLASH) + 1);

  MS_LOG(INFO) << "start reading exported model file";
  std::cout << "reading " << file_name << std::endl;
  auto context = std::make_shared<Context>();
  if (context == nullptr) {
    MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
    std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
    return RET_ERROR;
  }

  if (flags_->cpu_bind_mode_ == 2) {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
  } else if (flags_->cpu_bind_mode_ == 1) {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
    if (flags_->loss_name_ != "") {
      t_session->SetLossName(flags_->loss_name_);
    }
    if (epochs > 0) {
      t_session->Train();
    }
    session = t_session;
  } else {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
  }

  context->thread_num_ = flags_->num_threads_;

  auto *model = mindspore::lite::Model::Import(file_name.c_str());
  if (model == nullptr) {
    MS_LOG(ERROR) << "create model for lite session failed";
    return RET_ERROR;
  }
  auto lite_session = std::unique_ptr<session::LiteSession>(session::LiteSession::CreateSession(context.get()));
  if (lite_session == nullptr) {
    MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
    std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
    return RET_ERROR;
  }
  if (lite_session->CompileGraph(model) != RET_OK) {
    MS_LOG(ERROR) << "Cannot compile model";
    delete model;
    return RET_ERROR;
  }
  ms_inputs_ = lite_session->GetInputs();
  auto end_prepare_time = GetTimeUs();
  MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
  std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;

  // Load input
  MS_LOG(INFO) << "start generate input data";
  auto status = LoadInput();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Generate input data error";
    delete model;
    return status;
  }
  if (!flags_->data_file_.empty()) {
    MS_LOG(INFO) << "Check accuracy for exported model";
    std::cout << "Check accuracy for exported model " << std::endl;
    status = MarkAccuracyLite(lite_session);
    for (auto &data : data_) {
      data.second->shape.clear();
      data.second->data.clear();
      delete data.second;
    }
    data_.clear();
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status;
      std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl;
    session = session::LiteSession::CreateSession(&context);
    if (session == nullptr) {
      MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
      std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
      delete model;
      return status;
      return RET_ERROR;
    }
    if (session->CompileGraph(model) != RET_OK) {
      MS_LOG(ERROR) << "Cannot compile model";
      delete model;
      return RET_ERROR;
    }
    delete model;
  }
  delete model;
  return RET_OK;
 }

 int NetTrain::RunNetTrain() {
  auto start_prepare_time = GetTimeUs();
  // Load graph
  std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);

  MS_LOG(INFO) << "start reading model file";
  std::cout << "start reading model file" << std::endl;
  auto context = std::make_shared<Context>();
  if (context == nullptr) {
    MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
    std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
    return RET_ERROR;
  }

  if (flags_->cpu_bind_mode_ == 2) {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
  } else if (flags_->cpu_bind_mode_ == 1) {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
  } else {
    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
  }
  context->device_list_[0].device_info_.cpu_device_info_.enable_float16_ = flags_->enable_fp16_;
  layer_checksum_ = flags_->layer_checksum_;
  context->thread_num_ = flags_->num_threads_;

  auto *model = mindspore::lite::Model::Import(flags_->model_file_.c_str());
  if (model == nullptr) {
    MS_LOG(ERROR) << "create model for train session failed";
    return RET_ERROR;
  }
  session_ = session::TrainSession::CreateSession(model, context.get());
  if (session_ == nullptr) {
    MS_LOG(ERROR) << "RunNetTrain CreateSession failed while running " << model_name.c_str();
    std::cout << "RunNetTrain CreateSession failed while running " << model_name.c_str() << std::endl;
    return RET_ERROR;
  }

  if (flags_->loss_name_ != "") {
    session_->SetLossName(flags_->loss_name_);
  }
  session_->Train();

  ms_inputs_ = session_->GetInputs();
  auto end_prepare_time = GetTimeUs();
  MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
  std::cout << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;

  // Load input
  MS_LOG(INFO) << "start generate input data";
  auto status = LoadInput();
  MS_LOG(INFO) << "Load input data";
  auto ms_inputs = session->GetInputs();
  auto status = LoadInput(&ms_inputs);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Generate input data error";
    MS_LOG(ERROR) << "Load input data error";
    return status;
  }
  if (flags_->epochs_ > 0) {
    status = MarkPerformance();

  if ((epochs > 0) && (t_session != nullptr)) {
    status = MarkPerformance(t_session);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
      std::cout << "Run MarkPerformance error: " << status << std::endl;
      return status;
    }
    SaveModels(t_session, model);  // save file if flags are on
  }
  if (!flags_->data_file_.empty()) {
    status = MarkAccuracy();
    for (auto &data : data_) {
      data.second->shape.clear();
      data.second->data.clear();
      delete data.second;
    if (t_session != nullptr) {
      t_session->Eval();
    }
    data_.clear();
    status = MarkAccuracy(session);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
      std::cout << "Run MarkAccuracy error: " << status << std::endl;
      return status;
    }
  }
  status = CheckExecute(model);
  return RET_OK;
 }

 int NetTrain::RunNetTrain() {
  CreateAndRunNetwork(flags_->model_file_, true, flags_->epochs_);

  auto status = CheckExecutionOfSavedModels();  // re-initialize sessions according to flags
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Run CheckExecute error: " << status;
    std::cout << "Run CheckExecute error: " << status << std::endl;
@@ -653,8 +406,7 @@ int NetTrain::RunNetTrain() {
  return RET_OK;
 }

 int NetTrain::CheckExecute(mindspore::lite::Model *model) {
  int status;
 int NetTrain::SaveModels(session::TrainSession *session, mindspore::lite::Model *model) {
  if (!flags_->export_file_.empty()) {
    auto ret = Model::Export(model, flags_->export_file_.c_str());
    if (ret != RET_OK) {
@@ -662,67 +414,39 @@ int NetTrain::CheckExecute(mindspore::lite::Model *model) {
      std::cout << "Run SaveToFile error";
      return RET_ERROR;
    }
    delete session_;
    session_ = nullptr;
    status = RunExportedNet();
  }
  if (!flags_->inference_file_.empty()) {
    auto tick = GetTimeUs();
    auto status = session->ExportInference(flags_->inference_file_);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Run Exported model error: " << status;
      std::cout << "Run Exported model error: " << status << std::endl;
      MS_LOG(ERROR) << "Save model error: " << status;
      std::cout << "Save model error: " << status << std::endl;
      return status;
    }
  } else {
    if (!flags_->inference_file_.empty()) {
      auto tick = GetTimeUs();
      status = session_->ExportInference(flags_->inference_file_);
      if (status != RET_OK) {
        MS_LOG(ERROR) << "Save model error: " << status;
        std::cout << "Save model error: " << status << std::endl;
        return status;
      }
      std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n";
      delete session_;
      session_ = nullptr;

      status = RunExportedNetLite(flags_->inference_file_ + ".ms");
      if (status != RET_OK) {
        MS_LOG(ERROR) << "Running saved model error: " << status;
        std::cout << "Running saved model error: " << status << std::endl;
        return status;
      }
    }
    std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n";
  }
  return RET_OK;
 }

 void NetTrainFlags::InitInputDataList() {
  char *saveptr1 = nullptr;
  char *input_list = new char[this->in_data_file_.length() + 1];
  snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str());
  const char *split_c = ",";
  char *cur_input = strtok_r(input_list, split_c, &saveptr1);
  while (cur_input != nullptr) {
    input_data_list_.emplace_back(cur_input);
    cur_input = strtok_r(nullptr, split_c, &saveptr1);
  }
  delete[] input_list;
 }

 void NetTrainFlags::InitResizeDimsList() {
  std::string content;
  content = this->resize_dims_in_;
  std::vector<int64_t> shape;
  auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
  for (const auto &shape_str : shape_strs) {
    shape.clear();
    auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
    std::cout << "Resize Dims: ";
    for (const auto &dim_str : dim_strs) {
      std::cout << dim_str << " ";
      shape.emplace_back(static_cast<int64_t>(std::stoi(dim_str)));
    }
    std::cout << std::endl;
    this->resize_dims_.emplace_back(shape);
 int NetTrain::CheckExecutionOfSavedModels() {
  int status = RET_OK;
  if (!flags_->export_file_.empty()) {
    status = NetTrain::CreateAndRunNetwork(flags_->export_file_, true, 0);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << " error: " << status;
      std::cout << "Run Exported model " << flags_->export_file_ << " error: " << status << std::endl;
      return status;
    }
  }
  if (!flags_->inference_file_.empty()) {
    status = NetTrain::CreateAndRunNetwork(flags_->inference_file_ + ".ms", false, 0);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << ".ms error: " << status;
      std::cout << "Running saved model " << flags_->inference_file_ << ".ms error: " << status << std::endl;
      return status;
    }
  }
  return status;
 }

 int NetTrain::InitCallbackParameter() {
@@ -766,7 +490,7 @@ int NetTrain::InitCallbackParameter() {
    op_times_by_type_[call_param.node_type].second += cost;
    op_times_by_name_[call_param.node_name].first++;
    op_times_by_name_[call_param.node_name].second += cost;
    if (layer_checksum_) {
    if (flags_->layer_checksum_) {
      auto out_tensor = after_outputs.at(0);
      void *output = out_tensor->MutableData();
      int tensor_size = out_tensor->ElementsNum();
@@ -841,13 +565,6 @@ int NetTrain::Init() {
    std::cerr << "modelPath is required" << std::endl;
    return 1;
  }
  flags_->InitInputDataList();
  flags_->InitResizeDimsList();
  if (!flags_->resize_dims_.empty() && flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
    MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
    std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
    return RET_ERROR;
  }

  if (flags_->time_profiling_) {
    auto status = InitCallbackParameter();
@@ -925,14 +642,6 @@ int NetTrain::PrintResult(const std::vector<std::string> &title,
  return RET_OK;
 }

 NetTrain::~NetTrain() {
  for (auto iter : this->data_) {
    delete (iter.second);
  }
  this->data_.clear();
  if (session_ != nullptr) delete (session_);
 }

 int RunNetTrain(int argc, const char **argv) {
  NetTrainFlags flags;
  Option<std::string> err = flags.ParseFlags(argc, argv);
--- a/mindspore/lite/tools/benchmark_train/net_train.h
+++ b/mindspore/lite/tools/benchmark_train/net_train.h
@@ -42,15 +42,6 @@ enum MS_API DataType { kImage = 0, kBinary = 1 };
 constexpr float relativeTolerance = 1e-5;
 constexpr float absoluteTolerance = 1e-8;

 struct MS_API CheckTensor {
  CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data) {
    this->shape = shape;
    this->data = data;
  }
  std::vector<size_t> shape;
  std::vector<float> data;
 };

 template <typename T>
 float TensorSum(void *data, int size) {
  T *typed_data = reinterpret_cast<T *>(data);
@@ -84,10 +75,6 @@ class MS_API NetTrainFlags : public virtual FlagParser {

  ~NetTrainFlags() override = default;

  void InitInputDataList();

  void InitResizeDimsList();

 public:
  // common
  std::string model_file_;
@@ -118,25 +105,22 @@ class MS_API NetTrainFlags : public virtual FlagParser {
 class MS_API NetTrain {
 public:
  explicit NetTrain(NetTrainFlags *flags) : flags_(flags) {}

  virtual ~NetTrain();
  virtual ~NetTrain() = default;

  int Init();
  int RunNetTrain();
  int RunExportedNet();

 private:
  // call GenerateInputData or ReadInputFile to init inputTensors
  int LoadInput();
  int LoadInput(Vector<tensor::MSTensor *> *ms_inputs);

  // call GenerateRandomData to fill inputTensors
  int GenerateInputData();
  int GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs);

  int GenerateRandomData(size_t size, void *data);

  int ReadInputFile();

  int CompareOutput();
  int ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs);
  int CreateAndRunNetwork(const std::string &filename, int train_session, int epochs);

  int InitCallbackParameter();

@@ -208,22 +192,13 @@ class MS_API NetTrain {
    return meanError;
  }

  int MarkPerformance();

  int MarkAccuracy();
  int MarkPerformance(session::TrainSession *session);

 private:
  int RunExportedNetLite(std::string file_name);
  int MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite_session);
  int CompareOutputLite(const std::unique_ptr<session::LiteSession> &lite_session);
  int CheckExecute(mindspore::lite::Model *model);
  int MarkAccuracy(session::LiteSession *lite_session);
  int CompareOutput(const session::LiteSession &lite_session);
  int SaveModels(session::TrainSession *session, mindspore::lite::Model *model);
  int CheckExecutionOfSavedModels();
  NetTrainFlags *flags_;
  session::TrainSession *session_ = nullptr;
  std::vector<mindspore::tensor::MSTensor *> ms_inputs_;
  std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> ms_outputs_;
  std::unordered_map<std::string, CheckTensor *> data_;
  std::unordered_map<std::string, TypeId> data_type_map_{{"FLOAT", TypeId::kNumberTypeFloat},
                                                         {"INT32", TypeId::kNumberTypeInt32}};

  // callback parameters
  uint64_t op_begin_ = 0;
@@ -234,7 +209,6 @@ class MS_API NetTrain {

  mindspore::KernelCallBack before_call_back_;
  mindspore::KernelCallBack after_call_back_;
  bool layer_checksum_ = false;
 };

 int MS_API RunNetTrain(int argc, const char **argv);
--- a/mindspore/lite/tools/common/graph_util.h
+++ b/mindspore/lite/tools/common/graph_util.h
@@ -143,7 +143,7 @@ bool IndexingCompress(const std::set<T> &quant_data_set, const std::map<T, size_
  tensor->data.resize(new_data_str.size());

  tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING;
  MS_LOG(ERROR) << "set WeightQunatCompressType_INDEXING";
  MS_LOG(DEBUG) << "set WeightQunatCompressType_INDEXING";
  return true;
 }

@@ -285,21 +285,21 @@ bool PackRepetition(size_t bit_num, schema::TensorT *tensor) {
  auto pack_sparsity_size_in_bit =
    1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit;
  size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0);
  MS_LOG(ERROR) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
  MS_LOG(DEBUG) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
                << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte;
  auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte});
  if (min_byte_need == origin_size_in_byte) {
    return false;
  } else if (min_byte_need == pack_repetition_size_in_byte) {
    MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
    MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
    return IndexingCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
                               pack_repetition_size_in_byte, bit_num, tensor);
  } else if (min_byte_need == pack_sparsity_size_in_byte) {
    MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
    MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
    return SparsityCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
                               pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor);
  } else {
    MS_LOG(ERROR) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
    MS_LOG(DEBUG) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
                  << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}";
  }
  return false;
--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@@ -22,6 +22,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        ${CMAKE_CURRENT_SOURCE_DIR}/graphdef_transform.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/optimizer.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/quant_utils.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../common/graph_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../common/node_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../common/tensor_util.cc
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_quant_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_quant_pass.cc
@@ -14,14 +14,16 @@
 * limitations under the License.
 */

 #include "tools/converter/legacy_optimizer/graph/tensor_quant_pass.h"
 #include <vector>
 #include <cmath>
 #include "tools/converter/legacy_optimizer/graph/tensor_quant_pass.h"
 #include <algorithm>
 #include "tools/converter/converter_context.h"
 #include "tools/converter/quantizer/quantize_util.h"
 #include "tools/common/tensor_util.h"
 #include "tools/common/graph_util.h"
 #include "tools/common/node_util.h"
 #include "src/common/quant_utils.h"

 namespace mindspore::lite {
 namespace {
@@ -49,7 +51,7 @@ STATUS ComputeDataToInt8(const std::unique_ptr<TensorT> &tensor, int32_t index)
      return RET_OK;
    }
    for (size_t j = 0; j < wShapeSize; j++) {
      qDatas[j] = quant::QuantizeData<int8_t>(weightData[j], weightQauntParam.get());
      qDatas[j] = QuantizeData<int8_t>(weightData[j], weightQauntParam.get());
    }
  } else {  // convert uint8 to int8
    auto *weightData = static_cast<uint8_t *>(oriWeightData);
@@ -141,7 +143,7 @@ STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, co
        auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data());
        dst_data_int32[index] = quant_data;
      } else {
        auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
        auto quant_data = QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
        dst_data[index] = quant_data;
      }
    }
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
@@ -44,6 +44,7 @@
 #include "securec/include/securec.h"
 #include "tools/common/tensor_util.h"
 #include "src/common/file_utils.h"
 #include "src/common/quant_utils.h"
 #include "src/common/utils.h"
 #include "tools/converter/quantizer/weight_quantizer.h"

@@ -1282,8 +1283,7 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
    return status;
  }

  if (calibrator_->config_param_.mixed) {
    // get opname_bit map
  if (calibrator_->config_param_.mixed) {  // get opname_bit map
    auto weight_quant_func_graph = CopyFuncGraph(func_graph);
    if (weight_quant_func_graph == nullptr) {
      MS_LOG(ERROR) << "CopyFuncGraph error";
@@ -1315,7 +1315,6 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
    MS_LOG(ERROR) << "create session failed!";
    return RET_ERROR;
  }

  MS_LOG(INFO) << "start to update divergence's max value";
  status = DoInference();
  if (status != RET_OK) {
@@ -1363,14 +1362,12 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
      MS_LOG(ERROR) << "create session failed!";
      return RET_ERROR;
    }

    MS_LOG(INFO) << "do bias correction";
    status = BiasCorrection(func_graph);
    if (status != RET_OK) {
      MS_LOG(WARNING) << "BiasCorrection failed.";
    }
  }

  return RET_OK;
 }

@@ -1477,7 +1474,7 @@ KernelCallBack PostTrainingQuantizer::GetBeforeCallBack(bool int8_op) {
        quant_param_t.scale = quant_params[0].scale;
        quant_param_t.zeroPoint = quant_params[0].zeroPoint;
        for (auto float_data : fp32_op_input) {
          auto quant_data = QuantizeData<int8_t>(float_data, quant_param_t, quant_max, quant_min);
          auto quant_data = QuantizeData<int8_t>(float_data, &quant_param_t, quant_max, quant_min);
          quant_datas.push_back(quant_data);
        }

--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@@ -100,12 +100,12 @@ bool QuantStrategy::CanConvOpQuantized(const CNodePtr &node) const {
  return true;
 }

 bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const {
 bool QuantStrategy::CanOpPostQuantized(const AnfNodePtr &node) const {
  MS_ASSERT(node != nullptr);
  if (!node->isa<mindspore::CNode>()) {
    return false;
  }
  auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node);
  const auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node);
  auto type = NodePrimitiveType(cnode);
  static const std::vector<std::string> int8OpList = {
    ops::kNameAddFusion,     ops::kNameActivation,      ops::kNameAvgPoolFusion,
@@ -268,67 +268,6 @@ bool TensorQuantParamsInited(const schema::TensorT &tensor) {
  return true;
 }

 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
                             int quant_min, int num_bits) {
  MS_ASSERT(quantParam != nullptr);
  if (mMin > 0.0f) {
    MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision";
    mMin = 0.0f;
  }
  if (mMax < 0.0f) {
    MS_LOG(DEBUG) << "mMax " << mMax << " is smaller than 0, set to 0, this may course low precision";
    mMax = 0.0f;
  }
  if (mMin > mMax) {
    MS_LOG(ERROR) << "cal error while min" << mMin << ">" << mMax;
    return RET_PARAM_INVALID;
  }
  if (mMin == mMax) {
    if (mMin != 0.0f) {
      MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other";
      return RET_ERROR;
    }
    quantParam->inited = true;
    quantParam->min = mMin;
    quantParam->max = mMax;
    quantParam->scale = 0.0f;
    quantParam->zeroPoint = 0;
    quantParam->narrowRange = narrowRange;
    quantParam->numBits = num_bits;
    return RET_OK;
  }

  auto quantMinFloat = static_cast<double>(quant_min);
  auto quantMaxFloat = static_cast<double>(quant_max);
  if (fabs(quantMaxFloat - quantMinFloat) <= 0.0f) {
    MS_LOG(ERROR) << "divisor cannot be 0";
    return RET_ERROR;
  }
  double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat);
  if (fabs(scale) <= 0.0f) {
    MS_LOG(ERROR) << "divisor 'scale' cannot be 0";
    return RET_ERROR;
  }
  const double zeroPointFromMin = quantMinFloat - mMin / scale;
  int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin));
  if (scale < SCALE_THREASHOLD) {
    zeroPoint = 0;
  }
  // The zero point should always be in the range of quantized value,
  // [qmin, qmax].
  MS_ASSERT(zeroPoint >= quantMin);
  MS_ASSERT(zeroPoint <= quantMax);
  quantParam->inited = true;
  quantParam->min = mMin;
  quantParam->max = mMax;
  quantParam->scale = scale;
  quantParam->zeroPoint = zeroPoint;
  quantParam->narrowRange = narrowRange;
  quantParam->numBits = num_bits;

  return RET_OK;
 }

 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int numBits) {
  MS_ASSERT(quantParam != nullptr);
  if (mMin > 0.0f) {
@@ -999,26 +938,6 @@ STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_data
  return RET_OK;
 }

 void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
                         bool channel_at_first, float *desired_max, float *desired_min) {
  float min = FLT_MAX;
  float max = -FLT_MAX;
  // find min and max
  for (int j = 0; j < one_filter_size; j++) {
    auto index = j + i * one_filter_size;
    if (!channel_at_first) {
      index = j * channels + i;
    }
    if (index >= elem_count) {
      MS_LOG(ERROR) << "over flow!";
    }
    min = std::min(min, raw_datas[index]);
    max = std::max(max, raw_datas[index]);
  }
  *desired_max = max;
  *desired_min = min;
 }

 int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first) {
  auto channels = dims[0];
  if (!(*channel_at_first)) {
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.h
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h
@@ -43,6 +43,7 @@
 #include "src/lite_session.h"
 #include "tools/converter/graphdef_transform.h"
 #include "src/common/file_utils.h"
 #include "src/common/quant_utils.h"

 namespace mindspore::lite::quant {
 static constexpr size_t UINT8_QUANTIZATION = 8;
@@ -82,7 +83,7 @@ class QuantStrategy {

  bool CanConvOpQuantized(const CNodePtr &node) const;
  bool CanMulOpQuantized(const CNodePtr &node) const;
  bool CanOpPostQuantized(AnfNodePtr &node) const;
  bool CanOpPostQuantized(const AnfNodePtr &node) const;
  bool CanTensorQuantized(const AnfNodePtr &inputNode) const;

  size_t m_weight_size_;
@@ -100,9 +101,6 @@ constexpr int quant_param_size = 32 * 8;

 QuantParamHolderPtr GetCNodeQuantHolder(const PrimitivePtr &primitive);

 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
                             int quant_min, int num_bits);

 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false,
                             int numBits = UINT8_QUANTIZATION);

@@ -112,9 +110,6 @@ std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epoc

 STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_datas, int new_size, TypeId new_data_type);

 void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
                         bool channel_at_first, float *desired_max, float *desired_min);

 int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first);

 void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
@@ -123,193 +118,10 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes,
 void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
                       bool *channel_at_first, int *channel_cnt);

 template <typename T>
 T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
  MS_ASSERT(quantParam != nullptr);
  MS_ASSERT(quantParam->inited);
  const auto scale = quantParam->scale;
  const auto zeroPoint = quantParam->zeroPoint;
  const auto numBit = quantParam->numBits;
  const auto narrowRange = quantParam->narrowRange;
  double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1);
  const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<T>::min()) * scale;
  double minLimit;
  if (narrowRange) {
    minLimit = static_cast<float>(std::numeric_limits<T>::min() + 1 - zeroPoint) * scale;
  } else {
    minLimit = static_cast<float>(std::numeric_limits<T>::min() - zeroPoint) * scale;
  }

  return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
    double tmp;
    if (originData > maxLimit) {
      tmp = maxLimit;
    } else if (originData < minLimit) {
      tmp = minLimit;
    } else {
      tmp = originData;
    }
    auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale));
    return quantData;
  }();
 }

 template <typename T>
 T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quant_max, int quant_min) {
  MS_ASSERT(quantParam != nullptr);
  MS_ASSERT(quantParam->inited);
  const auto scale = quantParam.scale;
  const int zeroPoint = quantParam.zeroPoint;
  const auto narrowRange = quantParam.narrowRange;
  const int maxLimit = quant_max;
  const int minLimit = quant_min;
  if (scale <= SCALE_THREASHOLD) {
    return 0;
  }
  return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
    auto quant_data = std::round(originData / scale + zeroPoint);
    if (quant_data > maxLimit) {
      quant_data = maxLimit;
    } else if (quant_data < minLimit) {
      quant_data = minLimit;
    }
    return static_cast<T>(quant_data);
  }();
 }

 bool QuantParamEqual(const schema::QuantParamT &quant_param1, const schema::QuantParamT &quant_param2);

 bool TensorQuantParamsInited(const schema::TensorT &tensor);

 template <typename T>
 STATUS DoPerChannelQuant(const tensor::TensorPtr &weight, const QuantType &quant_type,
                         std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
                         const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas,
                         std::vector<float> *dequant_datas, TypeId quant_data_type, bool channel_at_first = true,
                         int channel_cnt = -1) {
  auto dims = weight->shape();
  size_t elem_count = weight->DataSize();
  auto *raw_datas = static_cast<float *>(weight->data_c());
  auto channels = CalChannels(dims, channel_cnt, &channel_at_first);
  if (channels == 0) {
    MS_LOG(ERROR) << "channels is zero";
    return RET_ERROR;
  }
  size_t one_filter_size = elem_count / channels;
  bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size;
  if (!do_quant && quant_type == QuantType_WeightQuant) {
    MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size;
    return RET_CONTINUE;
  }
  for (int i = 0; i < channels; i++) {
    float min = FLT_MAX;
    float max = -FLT_MAX;
    GetMaxMinPerchannel(channels, one_filter_size, i, elem_count, raw_datas, channel_at_first, &max, &min);
    schema::QuantParamT quant_param;
    STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
      return status;
    }
    // do quantization
    double average_dequant = 0;
    double average_raw = 0;
    for (uint32_t j = 0; j < one_filter_size; j++) {
      auto index = j + i * one_filter_size;
      if (!channel_at_first) {
        index = j * channels + i;
      }
      MS_ASSERT(index < elem_count);
      float raw_data = raw_datas[index];
      auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min);
      (*quant_datas)[index] = quant_data;

      if (quant_type == QuantType_WeightQuant) {
        float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint);
        (*dequant_datas)[index] = dequant_data;
        average_dequant += dequant_data;
        average_raw += raw_data;
      }
    }
    if (quant_type == QuantType_WeightQuant && !k_means) {
      // mean
      average_dequant = average_dequant / one_filter_size;
      average_raw = average_raw / one_filter_size;
      // std
      double variance_dequant = 0;
      double variance_raw = 0;
      for (uint32_t j = 0; j < one_filter_size; j++) {
        auto index = j + i * one_filter_size;
        if (!channel_at_first) {
          index = j * channels + i;
        }
        MS_ASSERT(index < elem_count);
        variance_dequant += std::pow((*dequant_datas)[index] - average_dequant, 2);
        variance_raw += std::pow(raw_datas[index] - average_raw, 2);
      }
      variance_dequant = std::sqrt(variance_dequant / one_filter_size);
      variance_raw = std::sqrt(variance_raw / one_filter_size);
      quant_param.varCorr = 1;
      if (variance_raw != 0 && variance_dequant != 0) {
        auto temp_var_corr = variance_raw / variance_dequant;
        if (temp_var_corr > 0 && temp_var_corr < 10) {
          quant_param.varCorr = temp_var_corr;
        } else {
          MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr;
        }
      }
      quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr;
    }
    quant_params->emplace_back(quant_param);
  }
  auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T), quant_data_type);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
    return RET_ERROR;
  }
  return RET_OK;
 }

 template <typename T>
 STATUS DoPerLayerQuant(const tensor::TensorPtr &weight, const QuantType &quant_type,
                       std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
                       const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas,
                       TypeId quant_data_type) {
  auto dims = weight->shape();
  size_t elem_count = weight->DataSize();
  auto *raw_datas = static_cast<float *>(weight->data_c());
  float min = FLT_MAX;
  float max = -FLT_MIN;
  for (uint32_t i = 0; i < elem_count; i++) {
    // find max min
    min = std::min(min, raw_datas[i]);
    max = std::max(max, raw_datas[i]);
  }

  schema::QuantParamT quant_param;
  if (!k_means) {
    STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
      return status;
    }
  }
  quant_params->emplace_back(quant_param);
  // update data and datatype
  for (uint32_t i = 0; i < elem_count; i++) {
    float raw_data = raw_datas[i];
    if (!k_means) {
      auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min);
      (*quant_datas)[i] = quant_data;
    }
  }
  auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T), quant_data_type);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
    return RET_ERROR;
  }
  return RET_OK;
 }
 template <typename T>
 STATUS DoBitPack(const tensor::TensorPtr &weight, const size_t &bit_num, const std::vector<T> &quant_datas) {
  if (bit_num != 8 && bit_num != 16) {
@@ -363,15 +175,19 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
  }

  std::vector<T> quant_data(elem_count);
  std::vector<float> dequant_datas(elem_count);
  int ret = RET_OK;
  if (per_channel) {
    bool channel_at_first = true;
    int channel_cnt = -1;
    CalQuantAssitInfo(primitive, dims, index, &channel_at_first, &channel_cnt);
    // channel at first
    ret = DoPerChannelQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data,
                               &dequant_datas, quant_data_type, channel_at_first, channel_cnt);
    auto channels = CalChannels(dims, channel_cnt, &channel_at_first);
    if (channels == 0) {
      MS_LOG(ERROR) << "channels is zero";
      return RET_ERROR;
    }
    ret = DoPerChannelQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(),
                               static_cast<mindspore::schema::QuantType>(quant_type), &quant_params, quant_max,
                               quant_min, bit_num, k_means, &quant_data, channels, channel_at_first);
    if (ret == RET_CONTINUE) {
      return ret;
    } else if (ret != RET_OK) {
@@ -379,13 +195,18 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
      return ret;
    }
  } else {
    ret = DoPerLayerQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data,
                             quant_data_type);
    ret = DoPerLayerQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), &quant_params, quant_max,
                             quant_min, bit_num, k_means, &quant_data);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "Do per layer quant failed.";
      return ret;
    }
  }
  auto status = UpdateTensorDataAndSize(weight, quant_data.data(), quant_data.size() * sizeof(T), quant_data_type);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
    return RET_ERROR;
  }

 #ifdef HUFFMAN_ENCODE
  auto huffman_encode = std::make_unique<lite::HuffmanEncode>();