Browse Source

!30025 support dynamic quant gather

Merge pull request !30025 from yeyunpeng2020/dynamic_quant_success
feature/build-system-rewrite
i-robot Gitee 4 years ago
parent
commit
7389df06af
No known key found for this signature in database GPG Key ID: 173E9B9CA92EEF8F
12 changed files with 446 additions and 47 deletions
  1. +1
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/gather_infer.c
  2. +38
    -0
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/dynamic_gather_int8.c
  3. +32
    -0
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/dynamic_gather_int8.h
  4. +5
    -0
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/quantize.h
  5. +215
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/dynamic_gather_int8.cc
  6. +53
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/dynamic_gather_int8.h
  7. +35
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc
  8. +8
    -1
      mindspore/lite/src/train/train_export.cc
  9. +2
    -27
      mindspore/lite/src/weight_decoder.cc
  10. +41
    -5
      mindspore/lite/src/weight_decoder.h
  11. +11
    -8
      mindspore/lite/tools/converter/quantizer/debug_info_manager.cc
  12. +5
    -4
      mindspore/lite/tools/converter/quantizer/debug_info_manager.h

+ 1
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/gather_infer.c View File

@@ -31,7 +31,7 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
const TensorC *indices = inputs[1];
TensorC *output = outputs[0];
output->data_type_ = input->data_type_;
if (parameter->quant_type_ == QuantType_QUANT_WEIGHT) {
if (parameter->quant_type_ == QuantType_QUANT_WEIGHT || parameter->quant_type_ == QuantType_QUANT_DYNAMIC) {
output->data_type_ = kNumberTypeFloat32;
}
output->format_ = input->format_;


+ 38
- 0
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/dynamic_gather_int8.c View File

@@ -0,0 +1,38 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "nnacl/int8/dynamic_gather_int8.h"
#include "nnacl/op_base.h"

void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int *indices,
int indices_element_size, float *output, const float *scale_in, const int *zp_in) {
for (int m = 0; m < outer_size; ++m) {
const int8_t *int8_in_m = input + inner_size * m * limit;
float *int8_out_m = output + inner_size * m * indices_element_size;
for (int i = 0; i < indices_element_size; ++i) {
int index = indices[i];
index = index < 0 ? index + limit : index;
const float scale = scale_in[index];
const int zp = zp_in[index];
float *out = int8_out_m + i * inner_size;
const int8_t *src = int8_in_m + index * inner_size;
for (int j = 0; j < inner_size; ++j) {
out[j] = (src[j] - zp) * scale;
}
}
}
return;
}

+ 32
- 0
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/dynamic_gather_int8.h View File

@@ -0,0 +1,32 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_

#include "nnacl/op_base.h"
#include "nnacl/int8/quantize.h"

#ifdef __cplusplus
extern "C" {
#endif
void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int *indices,
int indices_element_size, float *output, const float *scale_in, const int *zp_in);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_

+ 5
- 0
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/int8/quantize.h View File

@@ -89,6 +89,11 @@ typedef struct GatherQuantArg {
int zp_out_;
} GatherQuantArg;

typedef struct DynamicGatherQuantArg {
float *scale_in_;
int *zp_in_;
} DynamicGatherQuantArg;

typedef struct SoftmaxQuantArg {
QuantArg in_quant_args_;
QuantArg out_quant_arg_;


+ 215
- 0
mindspore/lite/src/runtime/kernel/arm/int8/dynamic_gather_int8.cc View File

@@ -0,0 +1,215 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/int8/dynamic_gather_int8.h"
#include <limits>
#include "nnacl/gather_parameter.h"
#include "nnacl/int8/dynamic_gather_int8.h"
#include "nnacl/int8/quantize.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"

using mindspore::kernel::KERNEL_ARCH;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Gather;

namespace mindspore::kernel {
DynamicGatherInt8CPUKernel::~DynamicGatherInt8CPUKernel() {
if (quant_param_ != nullptr) {
if (quant_param_->zp_in_ != nullptr) {
free(quant_param_->zp_in_);
quant_param_->zp_in_ = nullptr;
}
if (quant_param_->scale_in_ != nullptr) {
free(quant_param_->scale_in_);
quant_param_->scale_in_ = nullptr;
}
free(quant_param_);
quant_param_ = nullptr;
}
}

int DynamicGatherInt8CPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
CHECK_LESS_RETURN(out_tensors_.size(), 1);
if (in_tensors_.size() == kInputSize2) {
auto axis_data = reinterpret_cast<int *>(in_tensors_.at(C2NUM)->data());
if (axis_data == nullptr) {
MS_LOG(ERROR) << "DynamicGatherInt8CPUKernel input[2] data nullptr.";
return RET_ERROR;
}
axis_ = *axis_data;
} else {
axis_ = (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_;
}
auto input_tensor = in_tensors_.at(0);
if (!input_tensor->IsConst()) {
MS_LOG(ERROR) << "Does not support tensor0 is non-const.";
return RET_ERROR;
}

auto in_quant_args = input_tensor->quant_params();
quant_param_ = reinterpret_cast<DynamicGatherQuantArg *>(malloc(sizeof(DynamicGatherQuantArg)));
if (quant_param_ == nullptr) {
MS_LOG(ERROR) << "Malloc DynamicGatherQuantArg for dynamic gather int8 op failed!";
return RET_ERROR;
}
memset(quant_param_, 0, sizeof(DynamicGatherQuantArg));
auto channel_num = in_quant_args.size();
if (channel_num == 0 || channel_num > MAX_MALLOC_SIZE) {
MS_LOG(ERROR) << "channel_num must large than 0 and less than 2G.";
return RET_ERROR;
}
quant_param_->scale_in_ = reinterpret_cast<float *>(malloc(channel_num * sizeof(float)));
CHECK_NULL_RETURN(quant_param_->scale_in_);
quant_param_->zp_in_ = reinterpret_cast<int32_t *>(malloc(channel_num * sizeof(int32_t)));
CHECK_NULL_RETURN(quant_param_->zp_in_);
for (size_t i = 0; i < channel_num; ++i) {
quant_param_->scale_in_[i] = in_quant_args.at(i).scale;
quant_param_->zp_in_[i] = in_quant_args.at(i).zeroPoint;
}
if (!InferShapeDone()) {
return RET_OK;
}

return ReSize();
}

int DynamicGatherInt8CPUKernel::ReSize() {
auto input_tensor = in_tensors_.at(0);
auto indices_tensor = in_tensors_.at(1);
auto in_shape = input_tensor->shape();
int in_rank = in_shape.size();
MS_CHECK_LT(axis_, in_rank, RET_ERROR);
limit_ = in_shape.at(axis_);
outer_size_ = 1;
for (int i = 0; i < axis_; ++i) {
outer_size_ *= in_shape.at(i);
}
inner_size_ = 1;
for (int i = axis_ + 1; i < in_rank; ++i) {
inner_size_ *= in_shape.at(i);
}
indices_element_size_ = indices_tensor->ElementsNum();
return RET_OK;
}

int DynamicGatherInt8CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor,
int limit) {
if (!isIndicesInt32) {
if (indices_num >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) {
MS_LOG(ERROR) << "Input indices_num is invalid, indices_num: " << indices_num;
return RET_ERROR;
}
indices_data_ = reinterpret_cast<int32_t *>(ms_context_->allocator->Malloc(sizeof(int32_t) * indices_num));
if (indices_data_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
return RET_ERROR;
}
switch (indices_tensor->data_type()) {
case kNumberTypeInt64:
for (int i = 0; i < indices_num; i++) {
indices_data_[i] = static_cast<int>(reinterpret_cast<int64_t *>(indices_tensor->MutableData())[i]);
if (indices_data_[i] >= limit) {
MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit;
return RET_ERROR;
}
}
break;
case kNumberTypeFloat:
case kNumberTypeFloat32:
for (int i = 0; i < indices_num; i++) {
indices_data_[i] = static_cast<int>(reinterpret_cast<float *>(indices_tensor->MutableData())[i]);
if (indices_data_[i] >= limit) {
MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit;
return RET_ERROR;
}
}
break;
default:
MS_LOG(ERROR) << "Does not support data type: " << indices_tensor->data_type();
return RET_ERROR;
}
} else {
indices_data_ = reinterpret_cast<int32_t *>(indices_tensor->MutableData());
for (int i = 0; i < limit; ++i) {
if (indices_data_[i] >= limit) {
MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit;
return RET_ERROR;
}
}
}
return RET_OK;
}

int DynamicGatherInt8CPUKernel::DoGather(int task_id) {
auto input_tensor = in_tensors_.at(0);
auto indices_tensor = in_tensors_.at(1);
auto out_tensor = out_tensors_.at(0);

auto input_ptr = static_cast<int8_t *>(input_tensor->data());
CHECK_NULL_RETURN(input_ptr);
auto output_ptr = static_cast<float *>(out_tensor->data());
CHECK_NULL_RETURN(output_ptr);

int indices_element_size = indices_tensor->ElementsNum();
MS_CHECK_GT(indices_element_size, 0, RET_ERROR);

int stride = UP_DIV(outer_size_, thread_count_);
int outer_size = MSMIN(stride, outer_size_ - stride * task_id);
auto thread_stride = stride * task_id;

input_ptr += thread_stride * inner_size_ * limit_;
output_ptr += thread_stride * inner_size_ * indices_element_size;
DynamicGather(input_ptr, outer_size, inner_size_, limit_, indices_data_, indices_element_size_, output_ptr,
quant_param_->scale_in_, quant_param_->zp_in_);
return RET_OK;
}

int DynamicGather8Run(void *cdata, int task_id, float, float) {
auto gather_kernel = reinterpret_cast<DynamicGatherInt8CPUKernel *>(cdata);
auto error_code = gather_kernel->DoGather(task_id);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "GatherRun error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;
}
return RET_OK;
}

int DynamicGatherInt8CPUKernel::Run() {
auto indices_tensor = in_tensors_.at(1);

int indices_num = indices_tensor->ElementsNum();
bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32;
int ret = AssignIndicesData(isIndicesInt32, indices_num, indices_tensor, limit_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AssignIndicesData failed, error_code[" << ret << "]";
return ret;
}

int error_code = ParallelLaunch(this->ms_context_, DynamicGather8Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Gather function error error_code[" << error_code << "]";
return RET_ERROR;
}
if (!isIndicesInt32) {
ms_context_->allocator->Free(indices_data_);
indices_data_ = nullptr;
}
return RET_OK;
}
} // namespace mindspore::kernel

+ 53
- 0
mindspore/lite/src/runtime/kernel/arm/int8/dynamic_gather_int8.h View File

@@ -0,0 +1,53 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_

#include <vector>
#include "nnacl/gather_parameter.h"
#include "nnacl/int8/quantize.h"
#include "src/inner_kernel.h"

namespace mindspore::kernel {
class DynamicGatherInt8CPUKernel : public InnerKernel {
public:
DynamicGatherInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: InnerKernel(parameter, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {}
~DynamicGatherInt8CPUKernel() override;

int Prepare() override;
int ReSize() override;
int Run() override;
int DoGather(int task_id);

private:
int AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor, int limit);

private:
int thread_count_ = 0;
int inner_size_ = 0;
int limit_ = 0;
int outer_size_ = 0;
int axis_ = 0;
int indices_element_size_ = 0;
int *indices_data_ = nullptr;
DynamicGatherQuantArg *quant_param_ = nullptr;
};
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_

+ 35
- 1
mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc View File

@@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/arm/int8/gather_int8.h"
#include <vector>
#include "src/runtime/kernel/arm/int8/dynamic_gather_int8.h"
#include "nnacl/gather_parameter.h"
#include "nnacl/int8/gather_int8.h"
#include "nnacl/int8/quantize.h"
@@ -141,5 +142,38 @@ int GatherInt8CPUKernel::Run() {
return RET_OK;
}

REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Gather, LiteKernelCreator<GatherInt8CPUKernel>)
kernel::InnerKernel *GatherInt8CPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
if (parameter == nullptr) {
MS_LOG(ERROR) << "parameter is nullptr.";
return nullptr;
}

InnerKernel *kernel = nullptr;
if (parameter->quant_type_ == schema::QuantType_QUANT_ALL) {
kernel =
new (std::nothrow) GatherInt8CPUKernel(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
} else if (parameter->quant_type_ == schema::QuantType_QUANT_DYNAMIC) {
const int axis_index = 2;
if (inputs.size() > axis_index + 1 && inputs.at(axis_index)) {
MS_LOG(ERROR) << "kernel: " << parameter->name_ << " is unsupported Axis is not const.";
return nullptr;
}
kernel = new (std::nothrow)
DynamicGatherInt8CPUKernel(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
} else {
MS_LOG(ERROR) << "kernel: " << parameter->name_ << " is unsupported quant type:" << parameter->quant_type_;
free(parameter);
return nullptr;
}
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr.";
free(parameter);
return nullptr;
}
return kernel;
}

REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Gather, GatherInt8CPUKernelCreator)
} // namespace mindspore::kernel

+ 8
- 1
mindspore/lite/src/train/train_export.cc View File

@@ -349,12 +349,19 @@ void TrainExport::PrepareRemap(int offset) {
int TrainExport::ExportTensor(const Model *model, const std::vector<mindspore::lite::Tensor *> &tensors, int offset,
const std::vector<std::pair<size_t, tensor_info>> &map_index,
const std::vector<std::string> &output_names, const std::set<size_t> &out_set) {
std::vector<mindspore::lite::Tensor *> in_tensors;
for (auto index : map_index) {
auto id = index.first;
size_t pid = id - static_cast<size_t>(offset);
mindspore::lite::Tensor *tensor = tensors.at(pid);
in_tensors.push_back(tensor);
}
for (auto index : map_index) {
auto id = index.first;
size_t pid = id - static_cast<size_t>(offset);
mindspore::lite::Tensor *tensor = tensors.at(pid);
schema::Tensor *scTensor = model->all_tensors_.at(pid);
auto preferred_dim = WeightDecoder::GetPreferredDim(index.second.op_parameter, index.second.input_index,
auto preferred_dim = WeightDecoder::GetPreferredDim(in_tensors, index.second.op_parameter, index.second.input_index,
tensor->shape(), model->version_);
auto tensorT = CreateTensor(tensor, scTensor, preferred_dim);
if (tensorT == nullptr) {


+ 2
- 27
mindspore/lite/src/weight_decoder.cc View File

@@ -19,7 +19,6 @@
#include "src/huffman_decode.h"
#include "tools/converter/quantizer/fse_decoder.h"
#include "nnacl/conv_parameter.h"
#include "nnacl/gather_parameter.h"

namespace mindspore::lite {
namespace {
@@ -365,7 +364,7 @@ int WeightDecoder::DequantNode(OpParameter *op_parameter, const std::vector<Tens
int index = 0;
for (auto &tensor : in_tensors) {
MS_CHECK_TRUE_RET(tensor != nullptr, RET_ERROR);
auto preferred_dim = GetPreferredDim(op_parameter, index++, tensor->shape(), model_version);
auto preferred_dim = GetPreferredDim(in_tensors, op_parameter, index++, tensor->shape(), model_version);
auto ret = WeightDecoder::DequantTensor(tensor, preferred_dim, dst_data_type);
if (ret != RET_OK && ret != RET_NO_CHANGE) {
MS_LOG(DEBUG) << "Dequant tensor failed";
@@ -431,13 +430,7 @@ int WeightDecoder::GetDeConvPreferredDim(const OpParameter *op_parameter, const
}
}

int WeightDecoder::GetGatherPreferredDim(const OpParameter *op_parameter) {
MS_ASSERT(op_parameter != nullptr);
const auto *param = reinterpret_cast<const GatherParameter *>(op_parameter);
return param->axis_;
}

bool IsChannelFirst(int index, const OpParameter *op_parameter) {
bool WeightDecoder::IsChannelFirst(int index, const OpParameter *op_parameter) {
MS_ASSERT(op_parameter != nullptr);
if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) {
const auto *param = reinterpret_cast<const MatMulParameter *>(op_parameter);
@@ -450,24 +443,6 @@ bool IsChannelFirst(int index, const OpParameter *op_parameter) {
return true;
}

int WeightDecoder::GetPreferredDim(const OpParameter *op_parameter, int index, const std::vector<int> &dims,
const std::string &model_version) {
const int first_version_offset = 5;
if (model_version.empty() ||
model_version.substr(model_version.size() - first_version_offset, model_version.size()) < "1.6.0") {
return IsChannelFirst(index, op_parameter) ? 0 : 1;
}
if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) {
return GetMatMulPreferredDim(op_parameter, index, dims);
} else if (op_parameter->type_ == schema::PrimitiveType_Conv2dTransposeFusion) {
return 0;
} else if (op_parameter->type_ == schema::PrimitiveType_Gather) {
return GetGatherPreferredDim(op_parameter);
}
// The first index.
return 0;
}

bool NeedBitUppackCheck(const SchemaTensorWrapper &src_tensor) {
MS_ASSERT(src_tensor.handler() != nullptr);
MS_ASSERT(src_tensor.data() != nullptr);


+ 41
- 5
mindspore/lite/src/weight_decoder.h View File

@@ -25,6 +25,7 @@
#include <string>
#include <cmath>
#include "nnacl/matmul_parameter.h"
#include "nnacl/gather_parameter.h"
#include "src/lite_kernel.h"
#include "src/common/utils.h"
#include "src/tensor.h"
@@ -137,8 +138,24 @@ class WeightDecoder {

static int UnPack(const SchemaTensorWrapper &src_tensor, lite::Tensor *dst_tensor);

static int GetPreferredDim(const OpParameter *op_parameter, int index, const std::vector<int> &dims,
const std::string &model_version);
template <typename T>
static int GetPreferredDim(const std::vector<T *> &in_tensors, const OpParameter *op_parameter, int index,
const std::vector<int> &dims, const std::string &model_version) {
const int first_version_offset = 5;
if (model_version.empty() ||
model_version.substr(model_version.size() - first_version_offset, model_version.size()) < "1.6.0") {
return IsChannelFirst(index, op_parameter) ? 0 : 1;
}
if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) {
return GetMatMulPreferredDim(op_parameter, index, dims);
} else if (op_parameter->type_ == schema::PrimitiveType_Conv2dTransposeFusion) {
return 0;
} else if (op_parameter->type_ == schema::PrimitiveType_Gather) {
return GetGatherPreferredDim(op_parameter, in_tensors);
}
// The first index.
return 0;
}

template <typename ST, typename DT = float>
static DT *DequantData(const lite::Tensor *input_tensor, int preferred_dim) {
@@ -164,6 +181,8 @@ class WeightDecoder {

static int DecodeHuffmanCode(const SchemaTensorWrapper &src_tensor, lite::Tensor *dst_tensor);

static bool IsChannelFirst(int index, const OpParameter *op_parameter);

template <typename ST, typename DT = float>
static DT *DequantPerLayerData(const lite::Tensor *input_tensor, const ST *quant_datas) {
auto quant_param = input_tensor->quant_params();
@@ -244,7 +263,23 @@ class WeightDecoder {

static int GetMatMulPreferredDim(const OpParameter *op_parameter, int input_index, const std::vector<int> &dims);
static int GetDeConvPreferredDim(const OpParameter *op_parameter, const std::vector<int> &dims);
static int GetGatherPreferredDim(const OpParameter *op_parameter);

template <typename T>
static int GetGatherPreferredDim(const OpParameter *op_parameter, const std::vector<T *> &in_tensors) {
MS_ASSERT(op_parameter != nullptr);
const int axis_index = 2;
const int axis_tensor_size = 3;
if (in_tensors.size() == axis_tensor_size && in_tensors.at(axis_index)->IsConst()) {
if (in_tensors.at(axis_index)->data_type() == kNumberTypeInt32) {
return static_cast<int *>(in_tensors.at(axis_index)->data())[0];
} else if (in_tensors.at(axis_index)->data_type() == kNumberTypeInt64) {
return static_cast<int64_t *>(in_tensors.at(axis_index)->data())[0];
}
}
const auto *param = reinterpret_cast<const GatherParameter *>(op_parameter);
return param->axis_;
}

static int DequantWeight(lite::Tensor *input_tensor, int preferred_dim, TypeId dst_data_type = kNumberTypeFloat32);

template <typename T1, typename T2>
@@ -253,13 +288,14 @@ class WeightDecoder {
T2 uint_result = 0;
T1 result;
UnPackFromUintToOrigin<T2>(packed_data, unpack_bit_data);
const int base = 2;
while (static_cast<int>(unpack_bit_data->size()) >= origin_bit) {
for (int k = 0; k < origin_bit; k++) {
bool bit_tmp = unpack_bit_data->front();
uint_result = (static_cast<size_t>(bit_tmp) << static_cast<unsigned int>(k)) + uint_result;
unpack_bit_data->pop();
}
result = uint_result - static_cast<T2>(pow(2, origin_bit - 1));
result = uint_result - static_cast<T2>(pow(base, origin_bit - 1));
(static_cast<T1 *>(unpack_int))[*count] = result;
uint_result = 0;
(*count)++;
@@ -271,7 +307,7 @@ class WeightDecoder {
uint_result = (static_cast<unsigned int>(bit) << i) + uint_result;
unpack_bit_data->pop();
}
result = static_cast<T1>(uint_result - static_cast<T2>(pow(2, origin_bit - 1)));
result = static_cast<T1>(uint_result - static_cast<T2>(pow(base, origin_bit - 1)));
(static_cast<T1 *>(unpack_int))[*count] = result;
}
}


+ 11
- 8
mindspore/lite/tools/converter/quantizer/debug_info_manager.cc View File

@@ -193,10 +193,11 @@ int DebugInfoManager::SetOriginStaticInfo(QuantDebugInfo *quant_debug_info, cons
return RET_OK;
}

int DebugInfoManager::SetQuantStaticInfo(OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info,
int DebugInfoManager::SetQuantStaticInfo(const std::vector<mindspore::tensor::MSTensor *> &inputs,
OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info,
const mindspore::lite::Tensor &tensor) {
auto preferred_dim =
mindspore::lite::WeightDecoder::GetPreferredDim(op_parameter, tensor_index, tensor.shape(), Version());
mindspore::lite::WeightDecoder::GetPreferredDim(inputs, op_parameter, tensor_index, tensor.shape(), Version());
float *quant_data;
if (tensor.data_type() == kNumberTypeInt8) {
quant_data = mindspore::lite::WeightDecoder::DequantData<int8_t, float>(&tensor, preferred_dim);
@@ -266,8 +267,10 @@ int DebugInfoManager::AddOriginInfo(const mindspore::CallBackParam &call_back_pa
return RET_OK;
}

int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter,
bool is_input, int tensor_index, mindspore::lite::Tensor *compared_tensor) {
int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_param,
const std::vector<mindspore::tensor::MSTensor *> &inputs,
OpParameter *op_parameter, bool is_input, int tensor_index,
mindspore::lite::Tensor *compared_tensor) {
CHECK_NULL_RETURN(op_parameter);
CHECK_NULL_RETURN(compared_tensor);
QuantDebugInfo compared_debug_info;
@@ -280,7 +283,7 @@ int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_
auto is_const = compared_tensor->category() == CONST_TENSOR || compared_tensor->category() == CONST_SCALAR;
compared_debug_info.tensor_type_flag = is_const ? WEIGHT : ACTIVATION;
if (!compared_tensor->quant_params().empty()) {
auto ret = SetQuantStaticInfo(op_parameter, tensor_index, &compared_debug_info, *compared_tensor);
auto ret = SetQuantStaticInfo(inputs, op_parameter, tensor_index, &compared_debug_info, *compared_tensor);
if (ret != RET_OK) {
MS_LOG(ERROR) << compared_tensor->tensor_name() << " get quant static info failed.";
return RET_ERROR;
@@ -435,13 +438,13 @@ KernelCallBack DebugInfoManager::GetQuantBeforeCallBack(
MS_LOG(ERROR) << tensor->tensor_name() << " get const tensor failed.";
return false;
}
ret = AddComparedInfo(call_param, op_parameters.at(call_param.node_name), true, i, &new_tensor);
ret = AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), true, i, &new_tensor);
if (ret != RET_OK) {
MS_LOG(ERROR) << tensor->tensor_name() << " add compared info failed.";
return false;
}
} else {
auto ret = AddComparedInfo(call_param, op_parameters.at(call_param.node_name), true, i,
auto ret = AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), true, i,
static_cast<mindspore::lite::Tensor *>(tensor));
if (ret != RET_OK) {
MS_LOG(ERROR) << tensor->tensor_name() << " add compared info failed.";
@@ -494,7 +497,7 @@ KernelCallBack DebugInfoManager::GetAfterCallBack(const std::map<std::string, Op
// all outputs are same dtype.
for (size_t i = 0; i < outputs.size(); ++i) {
auto tensor = outputs.at(i);
AddComparedInfo(call_param, op_parameters.at(call_param.node_name), false, i,
AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), false, i,
static_cast<mindspore::lite::Tensor *>(tensor));
}
return true;


+ 5
- 4
mindspore/lite/tools/converter/quantizer/debug_info_manager.h View File

@@ -91,8 +91,9 @@ class DebugInfoManager {
int AddOriginInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter, bool is_input,
int tensor_index, mindspore::lite::Tensor *origin_tensor);

int AddComparedInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter, bool is_input,
int tensor_index, mindspore::lite::Tensor *compared_tensor);
int AddComparedInfo(const mindspore::CallBackParam &call_back_param,
const std::vector<mindspore::tensor::MSTensor *> &inputs, OpParameter *op_parameter,
bool is_input, int tensor_index, mindspore::lite::Tensor *compared_tensor);

void PrintAllDebugInfo();

@@ -100,8 +101,8 @@ class DebugInfoManager {

int SetOriginStaticInfo(QuantDebugInfo *quant_debug_info, const mindspore::lite::Tensor &tensor);

int SetQuantStaticInfo(OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info,
const mindspore::lite::Tensor &tensor);
int SetQuantStaticInfo(const std::vector<mindspore::tensor::MSTensor *> &inputs, OpParameter *op_parameter,
int tensor_index, QuantDebugInfo *quant_debug_info, const mindspore::lite::Tensor &tensor);

std::string ParseDataTypeFlagToString(DataTypeFlag data_type_flag);



Loading…
Cancel
Save