!30025 support dynamic quant gather

Merge pull request !30025 from yeyunpeng2020/dynamic_quant_success
4 years ago · 7389df06af
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/gather_infer.c
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/gather_infer.c
@@ -31,7 +31,7 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
  const TensorC *indices = inputs[1];
  TensorC *output = outputs[0];
  output->data_type_ = input->data_type_;
  if (parameter->quant_type_ == QuantType_QUANT_WEIGHT) {
  if (parameter->quant_type_ == QuantType_QUANT_WEIGHT || parameter->quant_type_ == QuantType_QUANT_DYNAMIC) {
    output->data_type_ = kNumberTypeFloat32;
  }
  output->format_ = input->format_;
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/dynamic_gather_int8.c
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/dynamic_gather_int8.c
@@ -0,0 +1,38 @@
 /**
 * Copyright 2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
 #include "nnacl/int8/dynamic_gather_int8.h"
 #include "nnacl/op_base.h"

 void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int *indices,
                   int indices_element_size, float *output, const float *scale_in, const int *zp_in) {
  for (int m = 0; m < outer_size; ++m) {
    const int8_t *int8_in_m = input + inner_size * m * limit;
    float *int8_out_m = output + inner_size * m * indices_element_size;
    for (int i = 0; i < indices_element_size; ++i) {
      int index = indices[i];
      index = index < 0 ? index + limit : index;
      const float scale = scale_in[index];
      const int zp = zp_in[index];
      float *out = int8_out_m + i * inner_size;
      const int8_t *src = int8_in_m + index * inner_size;
      for (int j = 0; j < inner_size; ++j) {
        out[j] = (src[j] - zp) * scale;
      }
    }
  }
  return;
 }
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/dynamic_gather_int8.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/dynamic_gather_int8.h
@@ -0,0 +1,32 @@
 /**
 * Copyright 2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_

 #include "nnacl/op_base.h"
 #include "nnacl/int8/quantize.h"

 #ifdef __cplusplus
 extern "C" {
 #endif
 void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int *indices,
                   int indices_element_size, float *output, const float *scale_in, const int *zp_in);
 #ifdef __cplusplus
 }
 #endif

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/quantize.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/quantize.h
@@ -89,6 +89,11 @@ typedef struct GatherQuantArg {
  int zp_out_;
 } GatherQuantArg;

 typedef struct DynamicGatherQuantArg {
  float *scale_in_;
  int *zp_in_;
 } DynamicGatherQuantArg;

 typedef struct SoftmaxQuantArg {
  QuantArg in_quant_args_;
  QuantArg out_quant_arg_;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/dynamic_gather_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/dynamic_gather_int8.cc
@@ -0,0 +1,215 @@
 /**
 * Copyright 2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/int8/dynamic_gather_int8.h"
 #include <limits>
 #include "nnacl/gather_parameter.h"
 #include "nnacl/int8/dynamic_gather_int8.h"
 #include "nnacl/int8/quantize.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"

 using mindspore::kernel::KERNEL_ARCH;
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Gather;

 namespace mindspore::kernel {
 DynamicGatherInt8CPUKernel::~DynamicGatherInt8CPUKernel() {
  if (quant_param_ != nullptr) {
    if (quant_param_->zp_in_ != nullptr) {
      free(quant_param_->zp_in_);
      quant_param_->zp_in_ = nullptr;
    }
    if (quant_param_->scale_in_ != nullptr) {
      free(quant_param_->scale_in_);
      quant_param_->scale_in_ = nullptr;
    }
    free(quant_param_);
    quant_param_ = nullptr;
  }
 }

 int DynamicGatherInt8CPUKernel::Prepare() {
  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
  CHECK_LESS_RETURN(out_tensors_.size(), 1);
  if (in_tensors_.size() == kInputSize2) {
    auto axis_data = reinterpret_cast<int *>(in_tensors_.at(C2NUM)->data());
    if (axis_data == nullptr) {
      MS_LOG(ERROR) << "DynamicGatherInt8CPUKernel input[2] data nullptr.";
      return RET_ERROR;
    }
    axis_ = *axis_data;
  } else {
    axis_ = (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_;
  }
  auto input_tensor = in_tensors_.at(0);
  if (!input_tensor->IsConst()) {
    MS_LOG(ERROR) << "Does not support tensor0 is non-const.";
    return RET_ERROR;
  }

  auto in_quant_args = input_tensor->quant_params();
  quant_param_ = reinterpret_cast<DynamicGatherQuantArg *>(malloc(sizeof(DynamicGatherQuantArg)));
  if (quant_param_ == nullptr) {
    MS_LOG(ERROR) << "Malloc DynamicGatherQuantArg for dynamic gather int8 op failed!";
    return RET_ERROR;
  }
  memset(quant_param_, 0, sizeof(DynamicGatherQuantArg));
  auto channel_num = in_quant_args.size();
  if (channel_num == 0 || channel_num > MAX_MALLOC_SIZE) {
    MS_LOG(ERROR) << "channel_num must large than 0 and less than 2G.";
    return RET_ERROR;
  }
  quant_param_->scale_in_ = reinterpret_cast<float *>(malloc(channel_num * sizeof(float)));
  CHECK_NULL_RETURN(quant_param_->scale_in_);
  quant_param_->zp_in_ = reinterpret_cast<int32_t *>(malloc(channel_num * sizeof(int32_t)));
  CHECK_NULL_RETURN(quant_param_->zp_in_);
  for (size_t i = 0; i < channel_num; ++i) {
    quant_param_->scale_in_[i] = in_quant_args.at(i).scale;
    quant_param_->zp_in_[i] = in_quant_args.at(i).zeroPoint;
  }
  if (!InferShapeDone()) {
    return RET_OK;
  }

  return ReSize();
 }

 int DynamicGatherInt8CPUKernel::ReSize() {
  auto input_tensor = in_tensors_.at(0);
  auto indices_tensor = in_tensors_.at(1);
  auto in_shape = input_tensor->shape();
  int in_rank = in_shape.size();
  MS_CHECK_LT(axis_, in_rank, RET_ERROR);
  limit_ = in_shape.at(axis_);
  outer_size_ = 1;
  for (int i = 0; i < axis_; ++i) {
    outer_size_ *= in_shape.at(i);
  }
  inner_size_ = 1;
  for (int i = axis_ + 1; i < in_rank; ++i) {
    inner_size_ *= in_shape.at(i);
  }
  indices_element_size_ = indices_tensor->ElementsNum();
  return RET_OK;
 }

 int DynamicGatherInt8CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor,
                                                  int limit) {
  if (!isIndicesInt32) {
    if (indices_num >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) {
      MS_LOG(ERROR) << "Input indices_num is invalid, indices_num: " << indices_num;
      return RET_ERROR;
    }
    indices_data_ = reinterpret_cast<int32_t *>(ms_context_->allocator->Malloc(sizeof(int32_t) * indices_num));
    if (indices_data_ == nullptr) {
      MS_LOG(ERROR) << "Memory allocation failed";
      return RET_ERROR;
    }
    switch (indices_tensor->data_type()) {
      case kNumberTypeInt64:
        for (int i = 0; i < indices_num; i++) {
          indices_data_[i] = static_cast<int>(reinterpret_cast<int64_t *>(indices_tensor->MutableData())[i]);
          if (indices_data_[i] >= limit) {
            MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit;
            return RET_ERROR;
          }
        }
        break;
      case kNumberTypeFloat:
      case kNumberTypeFloat32:
        for (int i = 0; i < indices_num; i++) {
          indices_data_[i] = static_cast<int>(reinterpret_cast<float *>(indices_tensor->MutableData())[i]);
          if (indices_data_[i] >= limit) {
            MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit;
            return RET_ERROR;
          }
        }
        break;
      default:
        MS_LOG(ERROR) << "Does not support data type: " << indices_tensor->data_type();
        return RET_ERROR;
    }
  } else {
    indices_data_ = reinterpret_cast<int32_t *>(indices_tensor->MutableData());
    for (int i = 0; i < limit; ++i) {
      if (indices_data_[i] >= limit) {
        MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit;
        return RET_ERROR;
      }
    }
  }
  return RET_OK;
 }

 int DynamicGatherInt8CPUKernel::DoGather(int task_id) {
  auto input_tensor = in_tensors_.at(0);
  auto indices_tensor = in_tensors_.at(1);
  auto out_tensor = out_tensors_.at(0);

  auto input_ptr = static_cast<int8_t *>(input_tensor->data());
  CHECK_NULL_RETURN(input_ptr);
  auto output_ptr = static_cast<float *>(out_tensor->data());
  CHECK_NULL_RETURN(output_ptr);

  int indices_element_size = indices_tensor->ElementsNum();
  MS_CHECK_GT(indices_element_size, 0, RET_ERROR);

  int stride = UP_DIV(outer_size_, thread_count_);
  int outer_size = MSMIN(stride, outer_size_ - stride * task_id);
  auto thread_stride = stride * task_id;

  input_ptr += thread_stride * inner_size_ * limit_;
  output_ptr += thread_stride * inner_size_ * indices_element_size;
  DynamicGather(input_ptr, outer_size, inner_size_, limit_, indices_data_, indices_element_size_, output_ptr,
                quant_param_->scale_in_, quant_param_->zp_in_);
  return RET_OK;
 }

 int DynamicGather8Run(void *cdata, int task_id, float, float) {
  auto gather_kernel = reinterpret_cast<DynamicGatherInt8CPUKernel *>(cdata);
  auto error_code = gather_kernel->DoGather(task_id);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "GatherRun error task_id[" << task_id << "] error_code[" << error_code << "]";
    return RET_ERROR;
  }
  return RET_OK;
 }

 int DynamicGatherInt8CPUKernel::Run() {
  auto indices_tensor = in_tensors_.at(1);

  int indices_num = indices_tensor->ElementsNum();
  bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32;
  int ret = AssignIndicesData(isIndicesInt32, indices_num, indices_tensor, limit_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "AssignIndicesData failed, error_code[" << ret << "]";
    return ret;
  }

  int error_code = ParallelLaunch(this->ms_context_, DynamicGather8Run, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Gather function error error_code[" << error_code << "]";
    return RET_ERROR;
  }
  if (!isIndicesInt32) {
    ms_context_->allocator->Free(indices_data_);
    indices_data_ = nullptr;
  }
  return RET_OK;
 }
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/dynamic_gather_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/dynamic_gather_int8.h
@@ -0,0 +1,53 @@
 /**
 * Copyright 2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_

 #include <vector>
 #include "nnacl/gather_parameter.h"
 #include "nnacl/int8/quantize.h"
 #include "src/inner_kernel.h"

 namespace mindspore::kernel {
 class DynamicGatherInt8CPUKernel : public InnerKernel {
 public:
  DynamicGatherInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
      : InnerKernel(parameter, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {}
  ~DynamicGatherInt8CPUKernel() override;

  int Prepare() override;
  int ReSize() override;
  int Run() override;
  int DoGather(int task_id);

 private:
  int AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor, int limit);

 private:
  int thread_count_ = 0;
  int inner_size_ = 0;
  int limit_ = 0;
  int outer_size_ = 0;
  int axis_ = 0;
  int indices_element_size_ = 0;
  int *indices_data_ = nullptr;
  DynamicGatherQuantArg *quant_param_ = nullptr;
 };
 }  // namespace mindspore::kernel

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc
@@ -15,6 +15,7 @@
 */
 #include "src/runtime/kernel/arm/int8/gather_int8.h"
 #include <vector>
 #include "src/runtime/kernel/arm/int8/dynamic_gather_int8.h"
 #include "nnacl/gather_parameter.h"
 #include "nnacl/int8/gather_int8.h"
 #include "nnacl/int8/quantize.h"
@@ -141,5 +142,38 @@ int GatherInt8CPUKernel::Run() {
  return RET_OK;
 }

 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Gather, LiteKernelCreator<GatherInt8CPUKernel>)
 kernel::InnerKernel *GatherInt8CPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
                                                const lite::Context *ctx, const kernel::KernelKey &desc) {
  if (parameter == nullptr) {
    MS_LOG(ERROR) << "parameter is nullptr.";
    return nullptr;
  }

  InnerKernel *kernel = nullptr;
  if (parameter->quant_type_ == schema::QuantType_QUANT_ALL) {
    kernel =
      new (std::nothrow) GatherInt8CPUKernel(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
  } else if (parameter->quant_type_ == schema::QuantType_QUANT_DYNAMIC) {
    const int axis_index = 2;
    if (inputs.size() > axis_index + 1 && inputs.at(axis_index)) {
      MS_LOG(ERROR) << "kernel: " << parameter->name_ << " is unsupported Axis is not const.";
      return nullptr;
    }
    kernel = new (std::nothrow)
      DynamicGatherInt8CPUKernel(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
  } else {
    MS_LOG(ERROR) << "kernel: " << parameter->name_ << " is unsupported quant type:" << parameter->quant_type_;
    free(parameter);
    return nullptr;
  }
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr.";
    free(parameter);
    return nullptr;
  }
  return kernel;
 }

 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Gather, GatherInt8CPUKernelCreator)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/train/train_export.cc
+++ b/mindspore/lite/src/train/train_export.cc
@@ -349,12 +349,19 @@ void TrainExport::PrepareRemap(int offset) {
 int TrainExport::ExportTensor(const Model *model, const std::vector<mindspore::lite::Tensor *> &tensors, int offset,
                              const std::vector<std::pair<size_t, tensor_info>> &map_index,
                              const std::vector<std::string> &output_names, const std::set<size_t> &out_set) {
  std::vector<mindspore::lite::Tensor *> in_tensors;
  for (auto index : map_index) {
    auto id = index.first;
    size_t pid = id - static_cast<size_t>(offset);
    mindspore::lite::Tensor *tensor = tensors.at(pid);
    in_tensors.push_back(tensor);
  }
  for (auto index : map_index) {
    auto id = index.first;
    size_t pid = id - static_cast<size_t>(offset);
    mindspore::lite::Tensor *tensor = tensors.at(pid);
    schema::Tensor *scTensor = model->all_tensors_.at(pid);
    auto preferred_dim = WeightDecoder::GetPreferredDim(index.second.op_parameter, index.second.input_index,
    auto preferred_dim = WeightDecoder::GetPreferredDim(in_tensors, index.second.op_parameter, index.second.input_index,
                                                        tensor->shape(), model->version_);
    auto tensorT = CreateTensor(tensor, scTensor, preferred_dim);
    if (tensorT == nullptr) {
--- a/mindspore/lite/src/weight_decoder.cc
+++ b/mindspore/lite/src/weight_decoder.cc
@@ -19,7 +19,6 @@
 #include "src/huffman_decode.h"
 #include "tools/converter/quantizer/fse_decoder.h"
 #include "nnacl/conv_parameter.h"
 #include "nnacl/gather_parameter.h"

 namespace mindspore::lite {
 namespace {
@@ -365,7 +364,7 @@ int WeightDecoder::DequantNode(OpParameter *op_parameter, const std::vector<Tens
  int index = 0;
  for (auto &tensor : in_tensors) {
    MS_CHECK_TRUE_RET(tensor != nullptr, RET_ERROR);
    auto preferred_dim = GetPreferredDim(op_parameter, index++, tensor->shape(), model_version);
    auto preferred_dim = GetPreferredDim(in_tensors, op_parameter, index++, tensor->shape(), model_version);
    auto ret = WeightDecoder::DequantTensor(tensor, preferred_dim, dst_data_type);
    if (ret != RET_OK && ret != RET_NO_CHANGE) {
      MS_LOG(DEBUG) << "Dequant tensor failed";
@@ -431,13 +430,7 @@ int WeightDecoder::GetDeConvPreferredDim(const OpParameter *op_parameter, const
  }
 }

 int WeightDecoder::GetGatherPreferredDim(const OpParameter *op_parameter) {
  MS_ASSERT(op_parameter != nullptr);
  const auto *param = reinterpret_cast<const GatherParameter *>(op_parameter);
  return param->axis_;
 }

 bool IsChannelFirst(int index, const OpParameter *op_parameter) {
 bool WeightDecoder::IsChannelFirst(int index, const OpParameter *op_parameter) {
  MS_ASSERT(op_parameter != nullptr);
  if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) {
    const auto *param = reinterpret_cast<const MatMulParameter *>(op_parameter);
@@ -450,24 +443,6 @@ bool IsChannelFirst(int index, const OpParameter *op_parameter) {
  return true;
 }

 int WeightDecoder::GetPreferredDim(const OpParameter *op_parameter, int index, const std::vector<int> &dims,
                                   const std::string &model_version) {
  const int first_version_offset = 5;
  if (model_version.empty() ||
      model_version.substr(model_version.size() - first_version_offset, model_version.size()) < "1.6.0") {
    return IsChannelFirst(index, op_parameter) ? 0 : 1;
  }
  if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) {
    return GetMatMulPreferredDim(op_parameter, index, dims);
  } else if (op_parameter->type_ == schema::PrimitiveType_Conv2dTransposeFusion) {
    return 0;
  } else if (op_parameter->type_ == schema::PrimitiveType_Gather) {
    return GetGatherPreferredDim(op_parameter);
  }
  // The first index.
  return 0;
 }

 bool NeedBitUppackCheck(const SchemaTensorWrapper &src_tensor) {
  MS_ASSERT(src_tensor.handler() != nullptr);
  MS_ASSERT(src_tensor.data() != nullptr);
--- a/mindspore/lite/src/weight_decoder.h
+++ b/mindspore/lite/src/weight_decoder.h
@@ -25,6 +25,7 @@
 #include <string>
 #include <cmath>
 #include "nnacl/matmul_parameter.h"
 #include "nnacl/gather_parameter.h"
 #include "src/lite_kernel.h"
 #include "src/common/utils.h"
 #include "src/tensor.h"
@@ -137,8 +138,24 @@ class WeightDecoder {

  static int UnPack(const SchemaTensorWrapper &src_tensor, lite::Tensor *dst_tensor);

  static int GetPreferredDim(const OpParameter *op_parameter, int index, const std::vector<int> &dims,
                             const std::string &model_version);
  template <typename T>
  static int GetPreferredDim(const std::vector<T *> &in_tensors, const OpParameter *op_parameter, int index,
                             const std::vector<int> &dims, const std::string &model_version) {
    const int first_version_offset = 5;
    if (model_version.empty() ||
        model_version.substr(model_version.size() - first_version_offset, model_version.size()) < "1.6.0") {
      return IsChannelFirst(index, op_parameter) ? 0 : 1;
    }
    if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) {
      return GetMatMulPreferredDim(op_parameter, index, dims);
    } else if (op_parameter->type_ == schema::PrimitiveType_Conv2dTransposeFusion) {
      return 0;
    } else if (op_parameter->type_ == schema::PrimitiveType_Gather) {
      return GetGatherPreferredDim(op_parameter, in_tensors);
    }
    // The first index.
    return 0;
  }

  template <typename ST, typename DT = float>
  static DT *DequantData(const lite::Tensor *input_tensor, int preferred_dim) {
@@ -164,6 +181,8 @@ class WeightDecoder {

  static int DecodeHuffmanCode(const SchemaTensorWrapper &src_tensor, lite::Tensor *dst_tensor);

  static bool IsChannelFirst(int index, const OpParameter *op_parameter);

  template <typename ST, typename DT = float>
  static DT *DequantPerLayerData(const lite::Tensor *input_tensor, const ST *quant_datas) {
    auto quant_param = input_tensor->quant_params();
@@ -244,7 +263,23 @@ class WeightDecoder {

  static int GetMatMulPreferredDim(const OpParameter *op_parameter, int input_index, const std::vector<int> &dims);
  static int GetDeConvPreferredDim(const OpParameter *op_parameter, const std::vector<int> &dims);
  static int GetGatherPreferredDim(const OpParameter *op_parameter);

  template <typename T>
  static int GetGatherPreferredDim(const OpParameter *op_parameter, const std::vector<T *> &in_tensors) {
    MS_ASSERT(op_parameter != nullptr);
    const int axis_index = 2;
    const int axis_tensor_size = 3;
    if (in_tensors.size() == axis_tensor_size && in_tensors.at(axis_index)->IsConst()) {
      if (in_tensors.at(axis_index)->data_type() == kNumberTypeInt32) {
        return static_cast<int *>(in_tensors.at(axis_index)->data())[0];
      } else if (in_tensors.at(axis_index)->data_type() == kNumberTypeInt64) {
        return static_cast<int64_t *>(in_tensors.at(axis_index)->data())[0];
      }
    }
    const auto *param = reinterpret_cast<const GatherParameter *>(op_parameter);
    return param->axis_;
  }

  static int DequantWeight(lite::Tensor *input_tensor, int preferred_dim, TypeId dst_data_type = kNumberTypeFloat32);

  template <typename T1, typename T2>
@@ -253,13 +288,14 @@ class WeightDecoder {
    T2 uint_result = 0;
    T1 result;
    UnPackFromUintToOrigin<T2>(packed_data, unpack_bit_data);
    const int base = 2;
    while (static_cast<int>(unpack_bit_data->size()) >= origin_bit) {
      for (int k = 0; k < origin_bit; k++) {
        bool bit_tmp = unpack_bit_data->front();
        uint_result = (static_cast<size_t>(bit_tmp) << static_cast<unsigned int>(k)) + uint_result;
        unpack_bit_data->pop();
      }
      result = uint_result - static_cast<T2>(pow(2, origin_bit - 1));
      result = uint_result - static_cast<T2>(pow(base, origin_bit - 1));
      (static_cast<T1 *>(unpack_int))[*count] = result;
      uint_result = 0;
      (*count)++;
@@ -271,7 +307,7 @@ class WeightDecoder {
        uint_result = (static_cast<unsigned int>(bit) << i) + uint_result;
        unpack_bit_data->pop();
      }
      result = static_cast<T1>(uint_result - static_cast<T2>(pow(2, origin_bit - 1)));
      result = static_cast<T1>(uint_result - static_cast<T2>(pow(base, origin_bit - 1)));
      (static_cast<T1 *>(unpack_int))[*count] = result;
    }
  }
--- a/mindspore/lite/tools/converter/quantizer/debug_info_manager.cc
+++ b/mindspore/lite/tools/converter/quantizer/debug_info_manager.cc
@@ -193,10 +193,11 @@ int DebugInfoManager::SetOriginStaticInfo(QuantDebugInfo *quant_debug_info, cons
  return RET_OK;
 }

 int DebugInfoManager::SetQuantStaticInfo(OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info,
 int DebugInfoManager::SetQuantStaticInfo(const std::vector<mindspore::tensor::MSTensor *> &inputs,
                                         OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info,
                                         const mindspore::lite::Tensor &tensor) {
  auto preferred_dim =
    mindspore::lite::WeightDecoder::GetPreferredDim(op_parameter, tensor_index, tensor.shape(), Version());
    mindspore::lite::WeightDecoder::GetPreferredDim(inputs, op_parameter, tensor_index, tensor.shape(), Version());
  float *quant_data;
  if (tensor.data_type() == kNumberTypeInt8) {
    quant_data = mindspore::lite::WeightDecoder::DequantData<int8_t, float>(&tensor, preferred_dim);
@@ -266,8 +267,10 @@ int DebugInfoManager::AddOriginInfo(const mindspore::CallBackParam &call_back_pa
  return RET_OK;
 }

 int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter,
                                      bool is_input, int tensor_index, mindspore::lite::Tensor *compared_tensor) {
 int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_param,
                                      const std::vector<mindspore::tensor::MSTensor *> &inputs,
                                      OpParameter *op_parameter, bool is_input, int tensor_index,
                                      mindspore::lite::Tensor *compared_tensor) {
  CHECK_NULL_RETURN(op_parameter);
  CHECK_NULL_RETURN(compared_tensor);
  QuantDebugInfo compared_debug_info;
@@ -280,7 +283,7 @@ int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_
  auto is_const = compared_tensor->category() == CONST_TENSOR || compared_tensor->category() == CONST_SCALAR;
  compared_debug_info.tensor_type_flag = is_const ? WEIGHT : ACTIVATION;
  if (!compared_tensor->quant_params().empty()) {
    auto ret = SetQuantStaticInfo(op_parameter, tensor_index, &compared_debug_info, *compared_tensor);
    auto ret = SetQuantStaticInfo(inputs, op_parameter, tensor_index, &compared_debug_info, *compared_tensor);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << compared_tensor->tensor_name() << " get quant static info failed.";
      return RET_ERROR;
@@ -435,13 +438,13 @@ KernelCallBack DebugInfoManager::GetQuantBeforeCallBack(
          MS_LOG(ERROR) << tensor->tensor_name() << " get const tensor failed.";
          return false;
        }
        ret = AddComparedInfo(call_param, op_parameters.at(call_param.node_name), true, i, &new_tensor);
        ret = AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), true, i, &new_tensor);
        if (ret != RET_OK) {
          MS_LOG(ERROR) << tensor->tensor_name() << " add compared info failed.";
          return false;
        }
      } else {
        auto ret = AddComparedInfo(call_param, op_parameters.at(call_param.node_name), true, i,
        auto ret = AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), true, i,
                                   static_cast<mindspore::lite::Tensor *>(tensor));
        if (ret != RET_OK) {
          MS_LOG(ERROR) << tensor->tensor_name() << " add compared info failed.";
@@ -494,7 +497,7 @@ KernelCallBack DebugInfoManager::GetAfterCallBack(const std::map<std::string, Op
      // all outputs are same dtype.
      for (size_t i = 0; i < outputs.size(); ++i) {
        auto tensor = outputs.at(i);
        AddComparedInfo(call_param, op_parameters.at(call_param.node_name), false, i,
        AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), false, i,
                        static_cast<mindspore::lite::Tensor *>(tensor));
      }
      return true;
--- a/mindspore/lite/tools/converter/quantizer/debug_info_manager.h
+++ b/mindspore/lite/tools/converter/quantizer/debug_info_manager.h
@@ -91,8 +91,9 @@ class DebugInfoManager {
  int AddOriginInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter, bool is_input,
                    int tensor_index, mindspore::lite::Tensor *origin_tensor);

  int AddComparedInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter, bool is_input,
                      int tensor_index, mindspore::lite::Tensor *compared_tensor);
  int AddComparedInfo(const mindspore::CallBackParam &call_back_param,
                      const std::vector<mindspore::tensor::MSTensor *> &inputs, OpParameter *op_parameter,
                      bool is_input, int tensor_index, mindspore::lite::Tensor *compared_tensor);

  void PrintAllDebugInfo();

@@ -100,8 +101,8 @@ class DebugInfoManager {

  int SetOriginStaticInfo(QuantDebugInfo *quant_debug_info, const mindspore::lite::Tensor &tensor);

  int SetQuantStaticInfo(OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info,
                         const mindspore::lite::Tensor &tensor);
  int SetQuantStaticInfo(const std::vector<mindspore::tensor::MSTensor *> &inputs, OpParameter *op_parameter,
                         int tensor_index, QuantDebugInfo *quant_debug_info, const mindspore::lite::Tensor &tensor);

  std::string ParseDataTypeFlagToString(DataTypeFlag data_type_flag);