post training quant mixed pricesion

4 years ago · bb1c4e3c6a
--- a/mindspore/lite/src/ops/adder.cc
+++ b/mindspore/lite/src/ops/adder.cc
@@ -21,7 +21,7 @@
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
 #ifdef PRIMITIVE_WRITEABLE
 #include "tools/converter/quantizer/quantize_util.h"
 #include "src/param_value_lite.h"
 #endif

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/conv2d.cc
+++ b/mindspore/lite/src/ops/conv2d.cc
@@ -24,7 +24,7 @@
 #include "src/common/log_adapter.h"
 #ifdef PRIMITIVE_WRITEABLE
 #include <float.h>
 #include "tools/converter/quantizer/quantize_util.h"
 #include "src/param_value_lite.h"
 #endif

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/deconv2d.cc
+++ b/mindspore/lite/src/ops/deconv2d.cc
@@ -21,8 +21,7 @@
 #include "src/common/log_adapter.h"
 #ifdef PRIMITIVE_WRITEABLE
 #include <float.h>

 #include "tools/converter/quantizer/quantize_util.h"
 #include "src/param_value_lite.h"
 #endif

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/depthwise_conv2d.cc
+++ b/mindspore/lite/src/ops/depthwise_conv2d.cc
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #ifdef PRIMITIVE_WRITEABLE
 #include "tools/converter/quantizer/quantize_util.h"
 #include "src/param_value_lite.h"
 #endif
 #ifndef PRIMITIVE_WRITEABLE
 #include "src/ops/ops_register.h"
--- a/mindspore/lite/src/ops/matmul.cc
+++ b/mindspore/lite/src/ops/matmul.cc
@@ -18,7 +18,7 @@
 #include <memory>
 #include <utility>
 #ifdef PRIMITIVE_WRITEABLE
 #include "tools/converter/quantizer/quantize_util.h"
 #include "src/param_value_lite.h"
 #endif

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/maximum.cc
+++ b/mindspore/lite/src/ops/maximum.cc
@@ -19,8 +19,7 @@
 #include "src/common/log_adapter.h"
 #ifdef PRIMITIVE_WRITEABLE
 #include <float.h>

 #include "tools/converter/quantizer/quantize_util.h"
 #include "src/param_value_lite.h"
 #endif

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/maximum_grad.cc
+++ b/mindspore/lite/src/ops/maximum_grad.cc
@@ -19,7 +19,7 @@
 #include "src/common/log_adapter.h"
 #ifdef PRIMITIVE_WRITEABLE
 #include <float.h>
 #include "tools/converter/quantizer/quantize_util.h"
 #include "src/param_value_lite.h"
 #endif

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/minimum_grad.cc
+++ b/mindspore/lite/src/ops/minimum_grad.cc
@@ -19,7 +19,7 @@
 #include "src/common/log_adapter.h"
 #ifdef PRIMITIVE_WRITEABLE
 #include <float.h>
 #include "tools/converter/quantizer/quantize_util.h"
 #include "src/param_value_lite.h"
 #endif

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/primitive_c.cc
+++ b/mindspore/lite/src/ops/primitive_c.cc
@@ -387,7 +387,9 @@ void PrimitiveC::set_input_quant_params(const std::vector<std::vector<schema::Qu
 }

 void PrimitiveC::set_input_quant_param(const size_t &index, const std::vector<schema::QuantParamT> &input_quant_param) {
  MS_ASSERT(index < this->input_quant_param_.size());
  if (index >= this->input_quant_param_.size()) {
    this->input_quant_param_.resize(index + 1);
  }
  this->input_quant_param_.at(index) = input_quant_param;
 }

@@ -493,7 +495,7 @@ std::shared_ptr<PrimitiveC> GetTupleGetItemPrim() {
 }

 template <typename T, typename = std::enable_if<std::is_base_of<PrimitiveC, T>::value>>
 std::shared_ptr<PrimitiveC> NewPrimitiveC(const Primitive &prim, const std::vector<AnfNodePtr> &inputs,
 std::shared_ptr<PrimitiveC> NewPrimitiveC(const mindspore::Primitive &prim, const std::vector<AnfNodePtr> &inputs,
                                          const schema::QuantType &quantType) {
  auto primc = std::make_shared<T>();
  if (primc == nullptr) {
--- a/mindspore/lite/tools/converter/anf_transform.cc
+++ b/mindspore/lite/tools/converter/anf_transform.cc
@@ -204,7 +204,7 @@ FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_grap
      ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR);
      return nullptr;
    }
    this->mQuantizer = std::make_unique<quant::WeightQuantizer>(new_graph, config->quantWeightSize,
    this->mQuantizer = std::make_unique<quant::WeightQuantizer>(new_graph, config->configFile, config->quantWeightSize,
                                                                config->quantWeightChannel, config->bitNum);
    if (mQuantizer == nullptr) {
      MS_LOG(ERROR) << "New WeightQuantizer failed";
--- a/mindspore/lite/tools/converter/converter.cc
+++ b/mindspore/lite/tools/converter/converter.cc
@@ -32,8 +32,6 @@
 #include "tools/anf_exporter/anf_exporter.h"
 #include "tools/anf_importer/import_from_mindir.h"
 #include "proto/onnx.pb.h"
 #include "tools/converter/quantizer/post_training_quantizer.h"
 #include "tools/converter/quantizer/quant_cast.h"
 #include "include/version.h"

 namespace mindspore {
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_name_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_name_pass.cc
@@ -16,7 +16,6 @@

 #include "tools/converter/legacy_optimizer/graph/tensor_name_pass.h"
 #include "tools/converter/converter_context.h"
 #include "tools/converter/quantizer/quantize_util.h"
 #include "tools/common/tensor_util.h"

 namespace mindspore::lite {
--- a/mindspore/lite/tools/converter/quantizer/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/quantizer/CMakeLists.txt
@@ -6,7 +6,6 @@ include_directories(${3RD_DIR}/opencv/build/include/opencv4)
 file(GLOB QUANTIZER
        ${CMAKE_CURRENT_SOURCE_DIR}/calc_quant_param.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/quantizer.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/aware_quantizer.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/quantize_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/post_training_quantizer.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/quant_cast.cc
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
@@ -39,6 +39,7 @@
 #include "tools/common/tensor_util.h"
 #include "src/common/file_utils.h"
 #include "src/common/utils.h"
 #include "tools/converter/quantizer/weight_quantizer.h"

 using std::string;
 using std::vector;
@@ -380,182 +381,16 @@ STATUS Calibrator::AddQuantizedOp(const CNodePtr &node) {
  return RET_OK;
 }

 void Calibrator::AddImage(const string &file, size_t index) {
  if (index >= images_.size()) {
    MS_LOG(ERROR) << "images_ size: " << images_.size() << " but index: " << index;
    return;
  }
  auto exist = [](const string &file) {
    struct stat buf {};
    return stat(file.c_str(), &buf) == 0;
  };
  if (exist(file)) {
    this->images_[index].push_back(file);
  } else {
    MS_LOG(WARNING) << "invalid image file path: " << file;
  }
 }

 STATUS Calibrator::GenerateInputData(size_t input_index, size_t image_index,
                                     mindspore::tensor::MSTensor *tensor) const {
  MS_ASSERT(tensor != nullptr);
  if (input_index >= images_.size()) {
    MS_LOG(ERROR) << "images_ size: " << images_.size() << " but input_index: " << input_index;
    return RET_ERROR;
  }
  if (image_index >= images_[input_index].size()) {
    MS_LOG(ERROR) << "images_[input_index] size: " << images_[input_index].size()
                  << " but image_index: " << image_index;
    return RET_ERROR;
  }
  string path = images_[input_index][image_index];
  MS_LOG(INFO) << "read image: " << path;
  size_t size;
  char *bin_buf = ReadFile(path.c_str(), &size);
  if (bin_buf == nullptr) {
    MS_LOG(ERROR) << "ReadFile return nullptr";
    return RET_NULL_PTR;
  }
  auto data = tensor->MutableData();
  if (data == nullptr) {
    MS_LOG(ERROR) << "Get tensor MutableData return nullptr";
    return RET_NULL_PTR;
  }
  if (size != tensor->Size()) {
    MS_LOG(ERROR) << "the input data is not consistent with model input, file_size: " << size
                  << " input tensor size: " << tensor->Size();
    return RET_ERROR;
  }
  auto ret = memcpy_s(data, tensor->Size(), bin_buf, size);
  if (ret != EOK) {
    MS_LOG(ERROR) << "memcpy_s error: " << ret;
    delete[] bin_buf;
    return RET_ERROR;
  }
  delete[] bin_buf;
  return RET_OK;
  return CopyInputDataToTensor(input_index, image_index, images_, tensor);
 }

 STATUS Calibrator::CollectImages() {
  this->images_.resize(config_param_.image_paths.size());
  auto input_i = 0;
  bool multi_input = config_param_.image_paths.size() > 1;
  for (const auto &image_path : config_param_.image_paths) {
    DIR *root = opendir(image_path.c_str());
    if (root == nullptr) {
      MS_LOG(ERROR) << "invalid image path: " << image_path;
      return RET_PARAM_INVALID;
    }
    struct dirent *image_dir = readdir(root);
    size_t count = 0;
    while (image_dir != nullptr) {
      string file_name(image_dir->d_name);
      if (file_name != "." && file_name != "..") {
        const std::string file_path = image_path + "/" + file_name;
        if (multi_input || config_param_.batch_count == 0) {
          this->AddImage(file_path, input_i);
          count++;
        } else if (count < config_param_.batch_count) {
          this->AddImage(file_path, input_i);
          count++;
        } else {
          break;
        }
      }
      image_dir = readdir(root);
    }
    std::sort(images_[input_i].begin(), images_[input_i].end());
    if (config_param_.batch_count != 0 && config_param_.batch_count < images_[input_i].size()) {
      images_[input_i].resize(config_param_.batch_count);
    }
    closedir(root);
    input_i++;
  }
  return RET_OK;
  return CollectCalibInputs(config_param_.image_paths, config_param_.batch_count, &images_);
 }

 STATUS Calibrator::ReadConfig() {
  if (config_path_.empty() || config_path_.length() > PATH_MAX) {
    MS_LOG(ERROR) << "invalid config path!";
    return RET_PARAM_INVALID;
  }
  // check whether config file path is valid
  char *resolved_path = new (std::nothrow) char[PATH_MAX]{0};
  if (resolved_path == nullptr) {
    MS_LOG(ERROR) << "New an object failed.";
    return RET_ERROR;
  }
 #ifdef _WIN32
  if (_fullpath(resolved_path, config_path_.c_str(), 1024) != nullptr) {
    config_path_ = string(resolved_path);
  }
 #else
  if (realpath(config_path_.c_str(), resolved_path) != nullptr) {
    config_path_ = string(resolved_path);
  }
 #endif
  std::ifstream fs(config_path_.c_str(), std::ifstream::in);
  if (!fs.is_open()) {
    MS_LOG(ERROR) << "config proto file %s open failed: " << config_path_;
    delete[] resolved_path;
    return RET_PARAM_INVALID;
  }
  std::string line;
  while (std::getline(fs, line)) {
    auto index = line.find('=');
    if (index == std::string::npos) {
      MS_LOG(ERROR) << "the config file is invalid, can not find '=', please check";
      delete[] resolved_path;
      return RET_PARAM_INVALID;
    }
    auto key = line.substr(0, index);
    auto value = line.substr(index + 1);
    Trim(&key);
    Trim(&value);
    if (key == "image_path") {
      auto &raw_image_paths = value;
      auto ind = raw_image_paths.find(',');
      while (ind != std::string::npos) {
        auto image_path = raw_image_paths.substr(0, ind);
        Trim(&image_path);
        config_param_.image_paths.push_back(image_path);
        raw_image_paths = raw_image_paths.substr(ind + 1);
        Trim(&raw_image_paths);
        ind = raw_image_paths.find(',');
      }
      config_param_.image_paths.push_back(raw_image_paths);
    } else if (key == "batch_count") {
      config_param_.batch_count = std::stoul(value);
    } else if (key == "thread_num") {
      config_param_.thread_num = std::stoul(value);
    } else if (key == "method_x") {
      if (value != kMethodKL && value != kMethodMaxMin && value != kMethodOutlier) {
        MS_LOG(WARNING) << "unsupported method_x: " << value << ". Use default value.";
      } else {
        config_param_.method_x = value;
      }
    } else if (key == "bias_correction") {
      std::for_each(value.begin(), value.end(), ::tolower);
      if (value == "true") {
        config_param_.bias_correction = true;
      }
    } else {
      MS_LOG(WARNING) << "unsupported parameter";
    }
  }

  for (const auto &path : config_param_.image_paths) {
    MS_LOG(DEBUG) << "calibration data_path: " << path;
  }
  MS_LOG(DEBUG) << "batch_count: " << config_param_.batch_count << "  "
                << "method_x: " << config_param_.method_x << "  "
                << "thread_num: " << config_param_.thread_num << " "
                << "bias_correction: " << config_param_.bias_correction;

  delete[] resolved_path;
  fs.close();
  return RET_OK;
 }
 STATUS Calibrator::ReadConfig() { return ParseConfigFile(config_path_, &config_param_); }

 Calibrator::Calibrator(string path, size_t bit_num, int quant_max, int quant_min)
    : config_path_(std::move(path)), bit_num_(bit_num), quant_max_(quant_max), quant_min_(quant_min) {}
@@ -621,8 +456,8 @@ STATUS PostTrainingQuantizer::DoQuantOutput(double scale, int zeropoint, struct
  return RET_OK;
 }

 STATUS PostTrainingQuantizer::DoWeightQuant(const AnfNodePtr &weight, std::shared_ptr<PrimitiveC> primitive_c,
                                            bool perchanel) const {
 STATUS PostTrainingQuantizer::DoWeightQuant(const std::string &op_name, const AnfNodePtr &weight,
                                            std::shared_ptr<PrimitiveC> primitive_c, bool perchanel) const {
  MS_ASSERT(weight != nullptr);
  MS_ASSERT(lite_primitive != nullptr);
  // perlayer
@@ -640,8 +475,21 @@ STATUS PostTrainingQuantizer::DoWeightQuant(const AnfNodePtr &weight, std::share
    MS_LOG(ERROR) << weight->fullname_with_scope() << " can not get value";
    return RET_NULL_PTR;
  }
  auto status = QuantFilter<int8_t>(paramValue, std::move(primitive_c), QuantType_PostTraining, quant_max, quant_min,
                                    bit_num, perchanel);
  auto bit_num_t = bit_num;
  auto quant_max_t = quant_max;
  auto quant_min_t = quant_min;
  if (calibrator_->config_param_.mixed) {
    auto opname_iter = opname_bit_.find(op_name);
    if (opname_iter == opname_bit_.end()) {
      MS_LOG(WARNING) << op_name << " not in the opname_bit_ map";
    } else {
      bit_num_t = opname_iter->second;
      quant_max_t = (1 << (unsigned int)(bit_num_t - 1)) - 1;
      quant_min_t = -(1 << (unsigned int)(bit_num_t - 1));
    }
  }
  auto status = QuantFilter<int8_t>(paramValue, std::move(primitive_c), QuantType_PostTraining, quant_max_t,
                                    quant_min_t, bit_num_t, perchanel);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "QuantFilter failed: " << status;
    return status;
@@ -921,7 +769,7 @@ STATUS PostTrainingQuantizer::QuantNode() {
          }
          if (abstractTensor->element()->GetTypeTrack()->type_id() == kNumberTypeFloat32) {
            MS_LOG(DEBUG) << "this parameter do quant";
            DoWeightQuant(input_node, primitive_c, false);
            DoWeightQuant(op_name, input_node, primitive_c, false);
          } else {
            MS_LOG(DEBUG) << "this parameter no need to do quant";
          }
@@ -943,7 +791,7 @@ STATUS PostTrainingQuantizer::QuantNode() {
          op_type == PrimitiveType_FullConnection) {
        perchannel = true;
      }
      DoWeightQuant(weight, primitive_c, perchannel);
      DoWeightQuant(op_name, weight, primitive_c, perchannel);
      // do bias quant
      if (cnode->inputs().size() == 4) {
        auto bias = cnode->input(3);
@@ -982,18 +830,8 @@ STATUS PostTrainingQuantizer::UpdateDivergInverval() {
 * 3. save quantied node
 **/
 STATUS PostTrainingQuantizer::PreProcess() {
  if (this->calibrator_ == nullptr) {
    MS_LOG(ERROR) << "calibrator is null!";
    return RET_ERROR;
  }
  // 1. generate config param
  STATUS status = calibrator_->ReadConfig();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "read proto text failed!";
    return status;
  }
  // 2. collect image files
  status = calibrator_->CollectImages();
  auto status = calibrator_->CollectImages();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "collect images failed!";
    return status;
@@ -1560,55 +1398,49 @@ STATUS PostTrainingQuantizer::ComputeThreshold() { return this->calibrator_->Com

 STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
  MS_LOG(INFO) << "start to parse config file";
  STATUS status = PreProcess();
  if (this->calibrator_ == nullptr) {
    MS_LOG(ERROR) << "calibrator is null!";
    return RET_ERROR;
  }
  // 1. generate config param
  STATUS status = calibrator_->ReadConfig();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "do pre process failed!";
    MS_LOG(ERROR) << "read proto text failed!";
    return status;
  }
  // anf -- fb
  auto meta_graph = Export(func_graph, true, true);
  if (meta_graph == nullptr) {
    MS_LOG(ERROR) << "Export to meta_graph return nullptr";
    return RET_ERROR;

  if (calibrator_->config_param_.mixed) {
    // get opname_bit map
    auto weight_quant_func_graph = CopyFuncGraph(func_graph);
    if (weight_quant_func_graph == nullptr) {
      MS_LOG(ERROR) << "CopyFuncGraph error";
      return RET_ERROR;
    }
    WeightQuantizer weight_quantizer(weight_quant_func_graph, calibrator_->config_param_);
    weight_quantizer.flags = flags;
    status = weight_quantizer.DoQuantize(weight_quant_func_graph);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Do mix weight quant error";
      return RET_ERROR;
    }
    opname_bit_ = weight_quantizer.opname_bit_;
  }

  // transform
  GraphDefTransform transform;
  transform.SetGraphDef(meta_graph);
  flags.quantType = schema::QuantType_QUANT_NONE;
  status = transform.Transform(flags);
  status = PreProcess();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "FBTransform model failed " << status;
    return RET_ERROR;
  }
  MS_LOG(INFO) << "start create session";
  flatbuffers::FlatBufferBuilder builder(1024);
  auto offset = schema::MetaGraph::Pack(builder, meta_graph);
  builder.Finish(offset);
  schema::FinishMetaGraphBuffer(builder, offset);
  size_t size = builder.GetSize();
  auto *content = reinterpret_cast<const char *>(builder.GetBufferPointer());
  if (content == nullptr) {
    MS_LOG(ERROR) << "GetBufferPointer nullptr";
    return RET_ERROR;
    MS_LOG(ERROR) << "do pre process failed!";
    return status;
  }
  auto model = lite::Model::Import(content, size);

  Context ctx;
  ctx.thread_num_ = calibrator_->GetThreadNum();

  fp32_session_ = dynamic_cast<mindspore::lite::LiteSession *>(session::LiteSession::CreateSession(&ctx));
  // anf -- fb
  flags.quantType = schema::QuantType_QUANT_NONE;
  MS_LOG(INFO) << "start create session";
  fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
  if (fp32_session_ == nullptr) {
    MS_LOG(ERROR) << "create session failed!";
    return RET_ERROR;
  }

  auto ret = fp32_session_->CompileGraph(model);
  if (ret != lite::RET_OK) {
    MS_LOG(ERROR) << "compile graph error";
    return RET_ERROR;
  }

  MS_LOG(INFO) << "start to update divergence's max value";
  status = DoInference();
  if (status != RET_OK) {
@@ -1647,49 +1479,13 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {

  if (calibrator_->GetBiasCorrection()) {
    // init in8 session
    // anf -- fb
    auto int8_meta_graph = Export(func_graph, true, true);
    if (int8_meta_graph == nullptr) {
      MS_LOG(ERROR) << "Export to int8_meta_graph return nullptr";
      return RET_ERROR;
    }

    // transform
    GraphDefTransform fb_transform;
    fb_transform.SetGraphDef(int8_meta_graph);
    MS_LOG(INFO) << "create quant session";
    flags.quantType = schema::QuantType_PostTraining;
    status = fb_transform.Transform(flags);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "FBTransform model failed " << status;
      return RET_ERROR;
    }
    MS_LOG(INFO) << "start create quantized session";
    flatbuffers::FlatBufferBuilder int8_builder(1024);
    auto int8_offset = schema::MetaGraph::Pack(int8_builder, int8_meta_graph);
    int8_builder.Finish(int8_offset);
    schema::FinishMetaGraphBuffer(int8_builder, int8_offset);
    size = int8_builder.GetSize();
    auto *int8_content = reinterpret_cast<const char *>(int8_builder.GetBufferPointer());
    if (int8_content == nullptr) {
      MS_LOG(ERROR) << "GetBufferPointer nullptr";
      return RET_ERROR;
    }
    auto int8_model = lite::Model::Import(int8_content, size);

    Context int8_ctx;
    int8_ctx.thread_num_ = calibrator_->GetThreadNum();
    int8_ctx.device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;

    int8_session_ = dynamic_cast<mindspore::lite::LiteSession *>(session::LiteSession::CreateSession(&int8_ctx));
    int8_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
    if (int8_session_ == nullptr) {
      MS_LOG(ERROR) << "create session failed!";
      return RET_ERROR;
    }
    ret = int8_session_->CompileGraph(int8_model);
    if (ret != lite::RET_OK) {
      MS_LOG(ERROR) << "compile graph error";
      return RET_ERROR;
    }

    MS_LOG(INFO) << "do bias correction";
    status = BiasCorrection(func_graph);
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h
@@ -28,6 +28,8 @@
 #include "tools/converter/quantizer/quantizer.h"
 #include "tools/converter/converter.h"
 #include "include/ms_tensor.h"
 #include "tools/converter/quantizer/quantize_util.h"
 #include "tools/converter/quantizer/weight_quantizer.h"

 namespace mindspore::lite::quant {
 class Calibrator;
@@ -38,19 +40,8 @@ struct MaxMin {
  float max;
 };

 const char kMethodMaxMin[] = "MAX_MIN";
 const char kMethodKL[] = "KL";
 const char kMethodOutlier[] = "RemovalOutlier";
 constexpr int kDefaultBinNumber = 2048;

 struct ConfigParam {
  std::vector<std::string> image_paths;
  uint32_t batch_count{100};
  std::string method_x{kMethodKL};
  uint32_t thread_num{1};
  bool bias_correction{false};
 };

 class PostTrainingQuantizer : public Quantizer {
 public:
  PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8,
@@ -64,14 +55,16 @@ class PostTrainingQuantizer : public Quantizer {
  int quant_min{INT8_MIN};

 private:
  std::map<std::string, int> opname_bit_;

  bool per_channel_{true};

  TypeId target_type_{kNumberTypeInt8};

  std::unique_ptr<Calibrator> calibrator_;

  mindspore::lite::LiteSession *fp32_session_;
  mindspore::lite::LiteSession *int8_session_;
  session::LiteSession *fp32_session_{nullptr};
  session::LiteSession *int8_session_{nullptr};

  std::map<std::string, std::vector<float>> fp32_op_input_map;           // concurency
  std::map<std::string, std::vector<float>> fp32_op_output_ch_mean_map;  // concurency
@@ -112,7 +105,8 @@ class PostTrainingQuantizer : public Quantizer {
  STATUS DoQuantOutput(double scale, int32_t zeropoint, struct MaxMin *max_min,
                       const std::shared_ptr<PrimitiveC> &) const;

  STATUS DoWeightQuant(const AnfNodePtr &weight, std::shared_ptr<PrimitiveC> primitive_c, bool perchannel) const;
  STATUS DoWeightQuant(const std::string &op_name, const AnfNodePtr &weight, std::shared_ptr<PrimitiveC> primitive_c,
                       bool perchannel) const;

  STATUS DoBiasQuant(const AnfNodePtr &bias, const std::shared_ptr<PrimitiveC> &primitive_c);
  STATUS Int8Inference();
@@ -213,13 +207,13 @@ class Calibrator {

  std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> *GetOutputDivergInfo();

  PostQuantConfig config_param_;

 private:
  std::vector<std::vector<std::string>> images_;  // multi_input, echo input has multi input data

  std::string config_path_;

  ConfigParam config_param_;

  std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> inputs_diverg_info_;

  std::unordered_map<std::string, std::vector<std::unique_ptr<DivergInfo>>> outputs_diverg_info_;
@@ -227,8 +221,6 @@ class Calibrator {
  size_t bit_num_;
  int quant_max_;
  int quant_min_;

  void AddImage(const std::string &file, size_t index);
 };
 }  // namespace mindspore::lite::quant
 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_POSTRAINING_QUANTIZER_H
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@@ -17,6 +17,8 @@
 #include "mindspore/lite/tools/converter/quantizer/quantize_util.h"
 #include <cmath>
 #include <string>
 #include <map>
 #include <fstream>
 #include <algorithm>
 #include <memory>
 #include <vector>
@@ -26,6 +28,8 @@
 #include "src/common/utils.h"
 #include "abstract/abstract_value.h"
 #include "securec/include/securec.h"
 #include "tools/anf_exporter/anf_exporter.h"
 #include "mindspore/lite/include/version.h"

 using std::string;
 using std::vector;
@@ -83,10 +87,10 @@ bool QuantStrategy::CanConvOpQuantized(const CNodePtr &node) const {

 bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const {
  MS_ASSERT(node != nullptr);
  if (!node->isa<CNode>()) {
  if (!node->isa<mindspore::CNode>()) {
    return false;
  }
  auto cnode = std::dynamic_pointer_cast<CNode>(node);
  auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node);
  auto type = NodePrimitiveType(cnode);
  static const std::vector<schema::PrimitiveType> int8OpList = {
    schema::PrimitiveType_Conv2D,
@@ -475,4 +479,307 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode) {
  }
  return (schema::PrimitiveType)primitive_c->Type();
 }

 STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config) {
  if (post_quant_config == nullptr) {
    MS_LOG(ERROR) << "post_quant_config is null.";
    return RET_PARAM_INVALID;
  }

  if (config_file.empty() || config_file.length() > PATH_MAX) {
    MS_LOG(ERROR) << "invalid config path!";
    return RET_PARAM_INVALID;
  }
  // check whether config file path is valid
  auto resolved_path = std::make_unique<char[]>(PATH_MAX);
  if (resolved_path == nullptr) {
    MS_LOG(ERROR) << "New an object failed.";
    return RET_ERROR;
  }
 #ifdef _WIN32
  if (_fullpath(resolved_path.get(), config_file.c_str(), 1024) != nullptr) {
    config_file = string(resolved_path.get());
  }
 #else
  if (realpath(config_file.c_str(), resolved_path.get()) != nullptr) {
    config_file = string(resolved_path.get());
  }
 #endif
  std::ifstream fs(config_file.c_str(), std::ifstream::in);
  if (!fs.is_open()) {
    MS_LOG(ERROR) << "config file open failed: " << config_file;
    return RET_PARAM_INVALID;
  }
  std::string line;
  while (std::getline(fs, line)) {
    Trim(&line);
    if (line.empty()) {
      continue;
    }
    auto index = line.find('=');
    if (index == std::string::npos) {
      MS_LOG(ERROR) << "the config file is invalid, can not find '=', please check";
      return RET_PARAM_INVALID;
    }
    auto key = line.substr(0, index);
    auto value = line.substr(index + 1);
    Trim(&key);
    Trim(&value);
    if (key == "image_path") {
      auto &raw_image_paths = value;
      auto ind = raw_image_paths.find(',');
      while (ind != std::string::npos) {
        auto image_path = raw_image_paths.substr(0, ind);
        Trim(&image_path);
        post_quant_config->image_paths.push_back(image_path);
        raw_image_paths = raw_image_paths.substr(ind + 1);
        Trim(&raw_image_paths);
        ind = raw_image_paths.find(',');
      }
      post_quant_config->image_paths.push_back(raw_image_paths);
    } else if (key == "batch_count") {
      post_quant_config->batch_count = std::stoul(value);
    } else if (key == "thread_num") {
      post_quant_config->thread_num = std::stoul(value);
    } else if (key == "method_x") {
      if (value != kMethodKL && value != kMethodMaxMin && value != kMethodOutlier) {
        MS_LOG(WARNING) << "unsupported method_x: " << value << ". Use default value.";
      } else {
        post_quant_config->method_x = value;
      }
    } else if (key == "bias_correction") {
      std::for_each(value.begin(), value.end(), ::tolower);
      if (value == "true") {
        post_quant_config->bias_correction = true;
      }
    } else if (key == "mixed") {
      std::for_each(value.begin(), value.end(), ::tolower);
      if (value == "true") {
        post_quant_config->mixed = true;
      }
    } else if (key == "mean_error_threshold") {
      post_quant_config->mean_error_threshold = std::stof(value);
    } else {
      MS_LOG(WARNING) << "unsupported parameter: " << key;
    }
  }

  for (const auto &path : post_quant_config->image_paths) {
    MS_LOG(DEBUG) << "calibration data_path: " << path;
  }
  MS_LOG(DEBUG) << "batch_count: " << post_quant_config->batch_count << "\n"
                << "method_x: " << post_quant_config->method_x << "\n"
                << "thread_num: " << post_quant_config->thread_num << "\n"
                << "bias_correction: " << post_quant_config->bias_correction << "\n"
                << "mixed: " << post_quant_config->mixed << "\n"
                << "mean_error_threshold: " << post_quant_config->mean_error_threshold;
  post_quant_config->inited = true;
  fs.close();
  return RET_OK;
 }

 session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
                                               int thread_num) {
  auto meta_graph = Export(func_graph, true, true);
  if (meta_graph == nullptr) {
    MS_LOG(ERROR) << "Export to meta_graph failed";
    return nullptr;
  }

  // transform
  GraphDefTransform fb_transform;
  fb_transform.SetGraphDef(meta_graph);
  auto status = fb_transform.Transform(flags);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "FBTransform model failed";
    return nullptr;
  }
  meta_graph->version = Version();

  flatbuffers::FlatBufferBuilder builder(1024);
  auto offset = schema::MetaGraph::Pack(builder, meta_graph);
  builder.Finish(offset);
  schema::FinishMetaGraphBuffer(builder, offset);
  auto size = builder.GetSize();
  auto *content = reinterpret_cast<const char *>(builder.GetBufferPointer());
  if (content == nullptr) {
    MS_LOG(ERROR) << "GetBufferPointer return null";
    return nullptr;
  }
  auto model = lite::Model::Import(content, size);
  if (model == nullptr) {
    MS_LOG(ERROR) << "Import model failed";
    return nullptr;
  }

  Context ctx;
  ctx.thread_num_ = thread_num;

  auto session = session::LiteSession::CreateSession(&ctx);
  if (session == nullptr) {
    MS_LOG(ERROR) << "create session failed.";
    return nullptr;
  }

  status = session->CompileGraph(model);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "CompileGraph error";
    return nullptr;
  }
  model->Free();
  return session;
 }

 STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
                          std::vector<std::vector<std::string>> *inputs) {
  if (inputs == nullptr) {
    MS_LOG(ERROR) << "inputs is null";
    return RET_ERROR;
  }
  auto AddImage = [&inputs](const std::string &file, size_t index) {
    if (index >= inputs->size()) {
      MS_LOG(ERROR) << "images_ size: " << inputs->size() << " but input index: " << index;
      return;
    }
    struct stat buf {};
    if (stat(file.c_str(), &buf) == 0) {
      inputs->at(index).push_back(file);
    } else {
      MS_LOG(WARNING) << "invalid image file path: " << file;
    }
  };

  inputs->resize(input_dirs.size());
  auto input_i = 0;
  bool multi_input = input_dirs.size() > 1;
  for (const auto &image_path : input_dirs) {
    DIR *root = opendir(image_path.c_str());
    if (root == nullptr) {
      MS_LOG(ERROR) << "invalid image path: " << image_path;
      return RET_PARAM_INVALID;
    }
    struct dirent *image_dir = readdir(root);
    size_t count = 0;
    while (image_dir != nullptr) {
      string file_name(image_dir->d_name);
      if (file_name != "." && file_name != "..") {
        const std::string file_path = image_path + "/" + file_name;
        if (multi_input || count == 0) {
          AddImage(file_path, input_i);
          count++;
        } else if (count < count_limited) {
          AddImage(file_path, input_i);
          count++;
        } else {
          break;
        }
      }
      image_dir = readdir(root);
    }
    std::sort(inputs->at(input_i).begin(), inputs->at(input_i).end());
    if (count_limited != 0 && count_limited < inputs->at(input_i).size()) {
      inputs->at(input_i).resize(count_limited);
    }
    closedir(root);
    input_i++;
  }
  return RET_OK;
 }

 STATUS CopyInputDataToTensor(size_t input_index, size_t image_index,
                             const std::vector<std::vector<std::string>> &images, mindspore::tensor::MSTensor *tensor) {
  MS_ASSERT(tensor != nullptr);
  if (input_index >= images.size()) {
    MS_LOG(ERROR) << "images_ size: " << images.size() << " but input_index: " << input_index;
    return RET_ERROR;
  }
  if (image_index >= images[input_index].size()) {
    MS_LOG(ERROR) << "images_[input_index] size: " << images[input_index].size() << " but image_index: " << image_index;
    return RET_ERROR;
  }
  string path = images[input_index][image_index];
  MS_LOG(INFO) << "read image: " << path;
  size_t size;
  char *bin_buf = ReadFile(path.c_str(), &size);
  if (bin_buf == nullptr) {
    MS_LOG(ERROR) << "ReadFile return nullptr";
    return RET_NULL_PTR;
  }
  auto data = tensor->MutableData();
  if (data == nullptr) {
    MS_LOG(ERROR) << "Get tensor MutableData return nullptr";
    return RET_NULL_PTR;
  }
  if (size != tensor->Size()) {
    MS_LOG(ERROR) << "the input data is not consistent with model input, file_size: " << size
                  << " input tensor size: " << tensor->Size();
    return RET_ERROR;
  }
  auto ret = memcpy_s(data, tensor->Size(), bin_buf, size);
  if (ret != EOK) {
    MS_LOG(ERROR) << "memcpy_s error: " << ret;
    delete[] bin_buf;
    return RET_ERROR;
  }
  delete[] bin_buf;
  return RET_OK;
 }

 FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &func_graph) {
  Cloner cloner({func_graph}, true, true, true, std::make_shared<TraceCopy>(), nullptr);
  auto new_func_graph = cloner[func_graph];

  std::map<std::string, CNodePtr> old_cnode_map;
  for (const auto &cnode : func_graph->GetOrderedCnodes()) {
    old_cnode_map[cnode->fullname_with_scope()] = cnode;
  }

  for (auto &cnode : new_func_graph->GetOrderedCnodes()) {
    auto cnode_name = cnode->fullname_with_scope();
    auto old_cnode_iter = old_cnode_map.find(cnode_name);
    if (old_cnode_iter == old_cnode_map.end()) {
      MS_LOG(ERROR) << "can not find node: " << cnode_name;
      return nullptr;
    }
    auto old_cnode = old_cnode_iter->second;
    auto inputs = cnode->inputs();
    for (size_t i = 0; i < inputs.size(); i++) {
      auto input_node = inputs[i];
      if (input_node->isa<Parameter>()) {
        auto param_node = input_node->cast<ParameterPtr>();
        if (param_node->has_default()) {
          ParamValueLitePtr old_param_value = std::static_pointer_cast<ParamValueLite>(param_node->default_param());
          auto new_param_value = std::make_shared<ParamValueLite>();

          auto copyed_data = malloc(old_param_value->tensor_size());
          if (copyed_data == nullptr) {
            MS_LOG(ERROR) << "malloc data error, size: " << old_param_value->tensor_size();
            return nullptr;
          }
          memcpy(copyed_data, old_param_value->tensor_addr(), old_param_value->tensor_size());

          new_param_value->set_tensor_size(old_param_value->tensor_size());
          new_param_value->set_tensor_addr(copyed_data);
          new_param_value->set_tensor_shape(old_param_value->tensor_shape());
          new_param_value->set_format(old_param_value->format());
          new_param_value->set_tensor_type(old_param_value->tensor_type());

          param_node->set_default_param(new_param_value);
        }

        auto old_abstract_base = param_node->abstract();
        if (!utils::isa<abstract::AbstractTensorPtr>(old_abstract_base)) {
          MS_LOG(ERROR) << "Abstract of parameter should be abstract tensor, " << param_node->name();
          return nullptr;
        }
        auto old_abstract = utils::cast<abstract::AbstractTensorPtr>(old_abstract_base);
        auto new_abstract = std::make_shared<abstract::AbstractTensor>(old_abstract->element()->GetTypeTrack(),
                                                                       old_abstract->GetShapeTrack());
        param_node->set_abstract(new_abstract);
      }
    }  // end inputs loop
  }    // end cnodes loop
  return new_func_graph;
 }

 }  // namespace mindspore::lite::quant
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.h
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h
@@ -17,6 +17,8 @@
 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H
 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H

 #include <dirent.h>
 #include <sys/stat.h>
 #include <memory>
 #include <string>
 #include <cmath>
@@ -35,11 +37,29 @@
 #include "ir/primitive.h"
 #include "abstract/dshape.h"
 #include "tools/converter/quantizer/bitpacking.h"
 #include "src/lite_session.h"
 #include "tools/converter/graphdef_transform.h"
 #include "src/common/file_utils.h"

 namespace mindspore::lite::quant {
 static constexpr size_t UINT8_QUANTIZATION = 8;
 static constexpr size_t WEIGHT_INDEX = 1;

 const char kMethodMaxMin[] = "MAX_MIN";
 const char kMethodKL[] = "KL";
 const char kMethodOutlier[] = "RemovalOutlier";

 struct PostQuantConfig {
  std::vector<std::string> image_paths;
  uint32_t batch_count{100};
  std::string method_x{kMethodKL};
  uint32_t thread_num{1};
  bool bias_correction{false};
  bool mixed{false};
  float mean_error_threshold{0.04};
  bool inited{false};
 };

 /**
 * 1. when op's weight size > mWeightSize just skip
 * 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization
@@ -320,6 +340,21 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
  return RET_OK;
 }

 // utils

 schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode);

 STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config);

 session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
                                               int thread_num);

 STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
                          std::vector<std::vector<std::string>> *inputs);

 STATUS CopyInputDataToTensor(size_t input_index, size_t image_index,
                             const std::vector<std::vector<std::string>> &images, mindspore::tensor::MSTensor *tensor);

 FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &);
 }  // namespace mindspore::lite::quant
 #endif
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
@@ -18,6 +18,7 @@
 #include <list>
 #include <string>
 #include <vector>
 #include <unordered_map>
 #include "src/common/common.h"
 #include "ir/dtype/type_id.h"

@@ -36,6 +37,7 @@ bool WeightQuantizer::IsPosNum(const std::string &str) {
  }
  return true;
 }

 STATUS WeightQuantizer::WeightQuantInputCheck(const converter::Flags *config) {
  MS_ASSERT(config != nullptr);
  if (!WeightQuantizer::IsPosNum(config->quantWeightChannel)) {
@@ -57,28 +59,57 @@ STATUS WeightQuantizer::WeightQuantInputCheck(const converter::Flags *config) {
  }
  return RET_OK;
 }
 WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const string &weightSize,

 WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const PostQuantConfig &config) : Quantizer(graph) {
  quant_strategy_ = std::make_unique<QuantStrategy>(0, 0);
  config_param_ = config;
 }

 WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const std::string &config_file, const string &weightSize,
                                 const std::string &convWeightChannelThreshold, const std::string &bitNum)
    : Quantizer(graph) {
  this->config_file_ = config_file;
  auto quantSize = static_cast<size_t>(std::stoull(weightSize));
  this->bitNum = static_cast<size_t>(std::stoull(bitNum));
  this->bit_num_ = static_cast<size_t>(std::stoull(bitNum));
  auto convQuantWeightChannelThreshold = static_cast<size_t>(std::stoull(convWeightChannelThreshold));
  mStrategy = std::make_unique<QuantStrategy>(quantSize, convQuantWeightChannelThreshold);
  quant_max = (1 << (unsigned int)(this->bitNum - 1)) - 1;
  quant_min = -(1 << (unsigned int)(this->bitNum - 1));
  quant_strategy_ = std::make_unique<QuantStrategy>(quantSize, convQuantWeightChannelThreshold);
  quant_max = (1 << (unsigned int)(this->bit_num_ - 1)) - 1;
  quant_min = -(1 << (unsigned int)(this->bit_num_ - 1));
  // parse type_id
  if (this->bitNum > 0 && this->bitNum <= 8) {
  if (this->bit_num_ > 0 && this->bit_num_ <= 8) {
    type_id = kNumberTypeInt8;
  } else if (this->bitNum <= 16) {
  } else if (this->bit_num_ <= 16) {
    type_id = kNumberTypeInt16;
  } else {
    MS_LOG(ERROR) << "invalid input bits";
  }
 }

 WeightQuantizer::~WeightQuantizer() { delete fp32_session_; }

 STATUS WeightQuantizer::SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node,
                                    std::shared_ptr<PrimitiveC> primitive_c) {
  // set dtype
  param_value->set_tensor_type(type_id);
  auto abstract_base = param_node->abstract();
  if (abstract_base == nullptr) {
    MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << param_node->name();
    return RET_ERROR;
  }
  if (!utils::isa<abstract::AbstractTensorPtr>(abstract_base)) {
    MS_LOG(ERROR) << "Abstract of parameter should be anstract tensor, " << param_node->name();
    return RET_ERROR;
  }
  auto abstract_tensor = utils::cast<abstract::AbstractTensorPtr>(abstract_base);
  abstract_tensor->element()->set_type(TypeIdToType(type_id));
  primitive_c->set_quant_type(schema::QuantType_WeightQuant);

  return RET_OK;
 }

 STATUS WeightQuantizer::DoConvQuantize(const std::list<CNodePtr> &nodes) {
  for (auto &cnode : nodes) {
    if (!mStrategy->CanConvOpQuantized(cnode)) {
    if (!quant_strategy_->CanConvOpQuantized(cnode)) {
      continue;
    }

@@ -108,36 +139,28 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list<CNodePtr> &nodes) {
    }
    auto status = RET_ERROR;
    if (type_id == kNumberTypeInt8) {
      status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max, quant_min, bitNum, true);
      status =
        QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max, quant_min, bit_num_, true);
    } else if (type_id == kNumberTypeInt16) {
      status =
        QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max, quant_min, bitNum, true);
        QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max, quant_min, bit_num_, true);
    }
    if (status != RET_OK) {
      MS_LOG(ERROR) << "QuantFilter failed : " << status;
      return status;
    }
    // set dtype
    param_value->set_tensor_type(type_id);
    auto abstractBase = param_node->abstract();
    if (abstractBase == nullptr) {
      MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << param_node->name();
      return RET_ERROR;
    }
    if (!utils::isa<abstract::AbstractTensorPtr>(abstractBase)) {
      MS_LOG(ERROR) << "Abstract of parameter should be anstract tensor, " << param_node->name();
    status = SetAbstract(param_value, param_node, primitive_c);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "SetAbstract failed : " << status;
      return RET_ERROR;
    }
    auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase);
    abstractTensor->element()->set_type(TypeIdToType(type_id));
    primitive_c->set_quant_type(schema::QuantType_WeightQuant);
  }
  return RET_OK;
 }

 STATUS WeightQuantizer::DoMulQuantize(const std::list<CNodePtr> &nodes) {
  for (auto &node : nodes) {
    if (!mStrategy->CanMulOpQuantized(node)) {
    if (!quant_strategy_->CanMulOpQuantized(node)) {
      continue;
    }
    auto already_quant = false;
@@ -186,38 +209,271 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list<CNodePtr> &nodes) {

    auto status = RET_ERROR;
    if (type_id == kNumberTypeInt8) {
      status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max, quant_min, bitNum, true);
      status =
        QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max, quant_min, bit_num_, true);
    } else if (type_id == kNumberTypeInt16) {
      status =
        QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max, quant_min, bitNum, true);
        QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max, quant_min, bit_num_, true);
    }
    if (status != RET_OK) {
      MS_LOG(ERROR) << "QuantFilter failed : " << status;
      return status;
    }
    param_value->set_tensor_type(type_id);
    // set dtype
    auto abstractBase = param_node->abstract();
    if (abstractBase == nullptr) {
      MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << param_node->name();
    status = SetAbstract(param_value, param_node, primitive_c);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "SetAbstract failed : " << status;
      return RET_ERROR;
    }
  }

  return RET_OK;
 }

 constexpr float relative_tolerance = 1e-5;
 constexpr float abs_tolerance = 1e-4;

 template <typename T>
 float CompareOutputData(const std::unordered_map<std::string, mindspore::tensor::MSTensor *> &expected_tensor,
                        const std::unordered_map<std::string, mindspore::tensor::MSTensor *> &compare_tensor) {
  auto valid_data = [](T data) -> bool { return (!std::isnan(data) && !std::isinf(data)); };

  float total_mean_error = 0.0f;
  int tensor_cnt = expected_tensor.size();

  if (tensor_cnt <= 0) {
    MS_LOG(ERROR) << "unexpected tensor_cnt: " << tensor_cnt;
    return RET_ERROR;
  }

  for (const auto &exp_tensor_pair : expected_tensor) {
    float mean_error = 0.0f;
    int error_cnt = 0;

    auto exp_tensor_name = exp_tensor_pair.first;
    auto exp_tensor = exp_tensor_pair.second;
    auto cmp_tensor_find_iter = compare_tensor.find(exp_tensor_name);
    if (cmp_tensor_find_iter == compare_tensor.end()) {
      MS_LOG(ERROR) << "can not find: " << exp_tensor_name;
      return RET_ERROR;
    }
    if (!utils::isa<abstract::AbstractTensorPtr>(abstractBase)) {
      MS_LOG(ERROR) << "Abstract of parameter should be anstract tensor, " << param_node->name();
    auto cmp_tensor = cmp_tensor_find_iter->second;

    auto exp_tensor_shape = exp_tensor->shape();
    auto cmp_tensor_shape = cmp_tensor->shape();
    if (exp_tensor_shape != cmp_tensor_shape) {
      MS_LOG(ERROR) << "exp tensor shape not equal to cmp. exp_tensor_elem_cnt: " << exp_tensor->ElementsNum()
                    << " cmp_tensor_elem_cnt: " << cmp_tensor->ElementsNum();
      return RET_ERROR;
    }
    auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase);
    abstractTensor->element()->set_type(TypeIdToType(type_id));
    primitive_c->set_quant_type(schema::QuantType_WeightQuant);
    auto exp_data = static_cast<T *>(exp_tensor->MutableData());
    auto cmp_data = static_cast<T *>(cmp_tensor->MutableData());
    auto elem_cnt = exp_tensor->ElementsNum();
    for (int i = 0; i < elem_cnt; i++) {
      if (!valid_data(exp_data[i]) || !valid_data(cmp_data[i])) {
        MS_LOG(ERROR) << "data is not valid. exp: " << exp_data[i] << " cmp: " << cmp_data[i] << " index: " << i;
        return RET_ERROR;
      }
      auto tolerance = abs_tolerance + relative_tolerance * fabs(exp_data[i]);
      auto abs_error = std::fabs(exp_data[i] - cmp_data[i]);
      if (abs_error > tolerance) {
        if (fabs(exp_data[i] == 0)) {
          if (abs_error > 1e-5) {
            mean_error += abs_error;
            error_cnt++;
          } else {
            // it is ok, very close to 0
            continue;
          }
        } else {
          mean_error += abs_error / (fabs(exp_data[i]) + FLT_MIN);
          error_cnt++;
        }
      } else {
        // it is ok, no error
        continue;
      }
    }  // end one tensor data loop
    total_mean_error += mean_error / elem_cnt;
  }  // end tensor loop
  return total_mean_error / tensor_cnt;
 }

 STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
  // 0.1 Create Fp32 Session
  flags.quantType = schema::QuantType_QUANT_NONE;
  fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
  if (fp32_session_ == nullptr) {
    MS_LOG(ERROR) << "CreateSessoin fail";
    return RET_ERROR;
  }
  auto fp32_inputs = fp32_session_->GetInputs();
  // 0.2 Parse input calib files
  auto status = CollectCalibInputs(config_param_.image_paths, config_param_.batch_count, &images_);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "CollectCalibInputs fail";
    return RET_ERROR;
  }

  auto cnodes = func_graph->GetOrderedCnodes();
  for (auto iter = cnodes.end(); iter != cnodes.begin();) {
    auto cnode = *(--iter);
    auto primitive_c = GetValueNode<std::shared_ptr<PrimitiveC>>(cnode->input(0));
    if (primitive_c == nullptr) {
      MS_LOG(ERROR) << "primitive_c is null.";
      return RET_ERROR;
    }
    auto op_name = cnode->fullname_with_scope();
    MS_LOG(DEBUG) << "process node: " << op_name
                  << " type: " << schema::EnumNamePrimitiveType((schema::PrimitiveType)primitive_c->Type());
    if (quant_strategy_->CanConvOpQuantized(cnode) || quant_strategy_->CanMulOpQuantized(cnode)) {
      auto input_node = cnode->input(2);
      if (!input_node->isa<Parameter>()) {
        MS_LOG(WARNING) << op_name << " the second input is not parameter";
        continue;
      }
      auto param_node = input_node->cast<ParameterPtr>();
      if (!param_node->has_default()) {
        MS_LOG(WARNING) << op_name << " the second input can not convert to parameter";
        continue;
      }
      auto param_value = std::static_pointer_cast<ParamValueLite>(param_node->default_param());
      if (param_value == nullptr) {
        MS_LOG(WARNING) << op_name << " the second input can not convert to parameter";
        continue;
      }
      if (param_value->tensor_type() != TypeId::kNumberTypeFloat32) {
        MS_LOG(WARNING) << op_name << " the second input type is not float";
        continue;
      }
      // copy origin data in case to recover
      auto *raw_data = static_cast<float *>(param_value->tensor_addr());
      auto elem_count = param_value->tensor_shape_size();
      auto origin_data = malloc(sizeof(float) * elem_count);
      auto ret = memcpy_s(origin_data, sizeof(float) * elem_count, raw_data, param_value->tensor_size());
      if (ret != EOK) {
        MS_LOG(ERROR) << "memcpy fail: "
                      << " dst size: " << sizeof(float) * elem_count << " src size: " << param_value->tensor_size();
        return RET_ERROR;
      }
      // 1. try quant
      for (int bit_num_t = 2; bit_num_t <= 8; bit_num_t++) {
        type_id = TypeId::kNumberTypeInt8;
        int quant_max_t = (1 << (unsigned int)(bit_num_t - 1)) - 1;
        int quant_min_t = -(1 << (unsigned int)(bit_num_t - 1));

        if (type_id == TypeId::kNumberTypeInt8) {
          status = QuantFilter<int8_t>(param_value, primitive_c, QuantType::QuantType_WeightQuant, quant_max_t,
                                       quant_min_t, bit_num_t, true);
        } else if (type_id == TypeId::kNumberTypeInt16) {
          status = QuantFilter<int16_t>(param_value, primitive_c, QuantType::QuantType_WeightQuant, quant_max_t,
                                        quant_min_t, bit_num_t, true);
        } else {
          MS_LOG(ERROR) << "unexpected type_id: " << type_id;
          return RET_ERROR;
        }
        if (status != RET_OK) {
          MS_LOG(ERROR) << "quant filter fail.";
          return RET_ERROR;
        }
        status = SetAbstract(param_value, param_node, primitive_c);
        if (status != RET_OK) {
          MS_LOG(ERROR) << "SetAbstract failed : " << status;
          return RET_ERROR;
        }
        // 2. evaluate the quant
        // 2.1 create quant session, get input, output tensor
        flags.quantType = schema::QuantType_WeightQuant;
        auto quant_session =
          std::unique_ptr<session::LiteSession>(CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num));
        if (quant_session == nullptr) {
          MS_LOG(ERROR) << "create session error: " << status;
          return RET_ERROR;
        }
        auto quant_inputs = quant_session->GetInputs();

        auto mean_error = 0.0f;
        if (fp32_inputs.size() != images_.size()) {
          MS_LOG(ERROR) << "model's input tensor cnt: " << fp32_inputs.size() << " != " << images_.size();
          return RET_ERROR;
        }
        auto image_cnt = images_.at(0).size();
        for (size_t i = 0; i < image_cnt; i++) {
          // set multi-input data
          for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) {
            status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]);
            if (status != RET_OK) {
              MS_LOG(ERROR) << "generate input data from images failed!";
              return RET_ERROR;
            }
            status = CopyInputDataToTensor(input_index, i, images_, quant_inputs[input_index]);
            if (status != RET_OK) {
              MS_LOG(ERROR) << "generate input data from images failed!";
              return RET_ERROR;
            }
          }
          std::future<STATUS> fp32_inference = std::async(
            std::launch::async, [](session::LiteSession *fp32_session) -> STATUS { return fp32_session->RunGraph(); },
            fp32_session_);

          status = quant_session->RunGraph();
          if (status != RET_OK) {
            MS_LOG(ERROR) << "quant session run error";
            return RET_ERROR;
          }
          status = fp32_inference.get();
          if (status != RET_OK) {
            MS_LOG(ERROR) << "fp32 session run error";
            return RET_ERROR;
          }
          // 3. compare betwen quant and fp32
          auto fp32_outputs = fp32_session_->GetOutputs();
          auto quant_outputs = quant_session->GetOutputs();
          mean_error += CompareOutputData<float>(fp32_outputs, quant_outputs);
        }  // end_for: calib data loop
        mean_error = mean_error / image_cnt;

        if (mean_error <= config_param_.mean_error_threshold) {
          MS_LOG(DEBUG) << "op: " << op_name << " got mixed bit: " << bit_num_t << " mean_error: " << mean_error;
          opname_bit_[op_name] = bit_num_t;
          break;
        } else if (bit_num_t != 8) {
          // recover
          param_value->set_tensor_size(sizeof(float) * elem_count);
          ret = memcpy_s(raw_data, param_value->tensor_size(), origin_data, sizeof(float) * elem_count);
          if (ret != EOK) {
            MS_LOG(ERROR) << "memcpy fail: "
                          << " src size: " << sizeof(float) * elem_count << " dst size: " << param_value->tensor_size();
            return RET_ERROR;
          }
        } else {
          MS_LOG(DEBUG) << "op: " << op_name << " set bit: " << bit_num_t << " mean_error: " << mean_error;
          opname_bit_[op_name] = bit_num_t;
        }
      }  // end bit loop
      free(origin_data);
    }  //  if: conv and matmul
  }    // end loop: all cnode
  return RET_OK;
 }

 STATUS WeightQuantizer::DoQuantize(FuncGraphPtr funcGraph) {
  MS_ASSERT(funcGraph != nullptr);
 STATUS WeightQuantizer::DoQuantize(FuncGraphPtr func_graph) {
  MS_ASSERT(func_graph != nullptr);
  STATUS ret;
  auto cnodes = funcGraph->GetOrderedCnodes();
  auto cnodes = func_graph->GetOrderedCnodes();

  if (!config_file_.empty()) {
    ret = ParseConfigFile(config_file_, &config_param_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "ReadConfig error.";
      return RET_ERROR;
    }
  }

  if (config_param_.mixed) {
    MS_LOG(INFO) << "Do mixed bit quantization";
    return DoMiexedQuant(func_graph);
  }

  ret = DoConvQuantize(cnodes);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "DoConvQuantize failed :" << ret;
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
@@ -17,9 +17,12 @@
 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H
 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H

 #include <future>
 #include <memory>
 #include <map>
 #include <list>
 #include <string>
 #include <vector>
 #include "tools/converter/quantizer/quantizer.h"
 #include "tools/converter/quantizer/quantize_util.h"
 #include "ir/func_graph.h"
@@ -27,27 +30,37 @@
 #include "include/model.h"
 #include "base/base.h"
 #include "abstract/dshape.h"
 #include "src/lite_session.h"

 namespace mindspore::lite::quant {
 class WeightQuantizer : public Quantizer {
 public:
  WeightQuantizer(FuncGraphPtr graph, const std::string &weightSize, const std::string &covWeightChannelThreshold,
                  const std::string &bitNum);
  WeightQuantizer(FuncGraphPtr graph, const std::string &config_file, const std::string &weightSize,
                  const std::string &covWeightChannelThreshold, const std::string &bitNum);
  WeightQuantizer(FuncGraphPtr graph, const PostQuantConfig &config);
  ~WeightQuantizer();

  ~WeightQuantizer() = default;

  STATUS DoQuantize(FuncGraphPtr funcGraph) override;
  STATUS DoQuantize(FuncGraphPtr func_graph) override;
  STATUS DoConvQuantize(const std::list<CNodePtr> &nodes);
  STATUS DoMulQuantize(const std::list<CNodePtr> &nodes);
  static STATUS WeightQuantInputCheck(const converter::Flags *config);
  static bool IsPosNum(const std::string &str);

  int quant_max;
  int quant_min;
  TypeId type_id{kTypeUnknown};
  std::map<std::string, int> opname_bit_;

 private:
  std::unique_ptr<QuantStrategy> mStrategy;
  size_t bitNum;
  std::unique_ptr<QuantStrategy> quant_strategy_;
  size_t bit_num_;
  std::string config_file_;
  PostQuantConfig config_param_;
  std::vector<std::vector<std::string>> images_;  // multi_input, [[mode_input_0], [model_input_1]...]
  session::LiteSession *fp32_session_ = nullptr;

  STATUS DoMiexedQuant(FuncGraphPtr);
  STATUS SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node, std::shared_ptr<PrimitiveC> primitive_c);
 };
 }  // namespace mindspore::lite::quant
 #endif