Merge pull request !30025 from yeyunpeng2020/dynamic_quant_successfeature/build-system-rewrite
| @@ -31,7 +31,7 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * | |||
| const TensorC *indices = inputs[1]; | |||
| TensorC *output = outputs[0]; | |||
| output->data_type_ = input->data_type_; | |||
| if (parameter->quant_type_ == QuantType_QUANT_WEIGHT) { | |||
| if (parameter->quant_type_ == QuantType_QUANT_WEIGHT || parameter->quant_type_ == QuantType_QUANT_DYNAMIC) { | |||
| output->data_type_ = kNumberTypeFloat32; | |||
| } | |||
| output->format_ = input->format_; | |||
| @@ -0,0 +1,38 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| * | |||
| */ | |||
| #include "nnacl/int8/dynamic_gather_int8.h" | |||
| #include "nnacl/op_base.h" | |||
| void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int *indices, | |||
| int indices_element_size, float *output, const float *scale_in, const int *zp_in) { | |||
| for (int m = 0; m < outer_size; ++m) { | |||
| const int8_t *int8_in_m = input + inner_size * m * limit; | |||
| float *int8_out_m = output + inner_size * m * indices_element_size; | |||
| for (int i = 0; i < indices_element_size; ++i) { | |||
| int index = indices[i]; | |||
| index = index < 0 ? index + limit : index; | |||
| const float scale = scale_in[index]; | |||
| const int zp = zp_in[index]; | |||
| float *out = int8_out_m + i * inner_size; | |||
| const int8_t *src = int8_in_m + index * inner_size; | |||
| for (int j = 0; j < inner_size; ++j) { | |||
| out[j] = (src[j] - zp) * scale; | |||
| } | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/int8/quantize.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int *indices, | |||
| int indices_element_size, float *output, const float *scale_in, const int *zp_in); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_DYNAMIC_GATHER_INT8_H_ | |||
| @@ -89,6 +89,11 @@ typedef struct GatherQuantArg { | |||
| int zp_out_; | |||
| } GatherQuantArg; | |||
| typedef struct DynamicGatherQuantArg { | |||
| float *scale_in_; | |||
| int *zp_in_; | |||
| } DynamicGatherQuantArg; | |||
| typedef struct SoftmaxQuantArg { | |||
| QuantArg in_quant_args_; | |||
| QuantArg out_quant_arg_; | |||
| @@ -0,0 +1,215 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/dynamic_gather_int8.h" | |||
| #include <limits> | |||
| #include "nnacl/gather_parameter.h" | |||
| #include "nnacl/int8/dynamic_gather_int8.h" | |||
| #include "nnacl/int8/quantize.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| using mindspore::kernel::KERNEL_ARCH; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Gather; | |||
| namespace mindspore::kernel { | |||
| DynamicGatherInt8CPUKernel::~DynamicGatherInt8CPUKernel() { | |||
| if (quant_param_ != nullptr) { | |||
| if (quant_param_->zp_in_ != nullptr) { | |||
| free(quant_param_->zp_in_); | |||
| quant_param_->zp_in_ = nullptr; | |||
| } | |||
| if (quant_param_->scale_in_ != nullptr) { | |||
| free(quant_param_->scale_in_); | |||
| quant_param_->scale_in_ = nullptr; | |||
| } | |||
| free(quant_param_); | |||
| quant_param_ = nullptr; | |||
| } | |||
| } | |||
| int DynamicGatherInt8CPUKernel::Prepare() { | |||
| CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); | |||
| CHECK_LESS_RETURN(out_tensors_.size(), 1); | |||
| if (in_tensors_.size() == kInputSize2) { | |||
| auto axis_data = reinterpret_cast<int *>(in_tensors_.at(C2NUM)->data()); | |||
| if (axis_data == nullptr) { | |||
| MS_LOG(ERROR) << "DynamicGatherInt8CPUKernel input[2] data nullptr."; | |||
| return RET_ERROR; | |||
| } | |||
| axis_ = *axis_data; | |||
| } else { | |||
| axis_ = (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_; | |||
| } | |||
| auto input_tensor = in_tensors_.at(0); | |||
| if (!input_tensor->IsConst()) { | |||
| MS_LOG(ERROR) << "Does not support tensor0 is non-const."; | |||
| return RET_ERROR; | |||
| } | |||
| auto in_quant_args = input_tensor->quant_params(); | |||
| quant_param_ = reinterpret_cast<DynamicGatherQuantArg *>(malloc(sizeof(DynamicGatherQuantArg))); | |||
| if (quant_param_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc DynamicGatherQuantArg for dynamic gather int8 op failed!"; | |||
| return RET_ERROR; | |||
| } | |||
| memset(quant_param_, 0, sizeof(DynamicGatherQuantArg)); | |||
| auto channel_num = in_quant_args.size(); | |||
| if (channel_num == 0 || channel_num > MAX_MALLOC_SIZE) { | |||
| MS_LOG(ERROR) << "channel_num must large than 0 and less than 2G."; | |||
| return RET_ERROR; | |||
| } | |||
| quant_param_->scale_in_ = reinterpret_cast<float *>(malloc(channel_num * sizeof(float))); | |||
| CHECK_NULL_RETURN(quant_param_->scale_in_); | |||
| quant_param_->zp_in_ = reinterpret_cast<int32_t *>(malloc(channel_num * sizeof(int32_t))); | |||
| CHECK_NULL_RETURN(quant_param_->zp_in_); | |||
| for (size_t i = 0; i < channel_num; ++i) { | |||
| quant_param_->scale_in_[i] = in_quant_args.at(i).scale; | |||
| quant_param_->zp_in_[i] = in_quant_args.at(i).zeroPoint; | |||
| } | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int DynamicGatherInt8CPUKernel::ReSize() { | |||
| auto input_tensor = in_tensors_.at(0); | |||
| auto indices_tensor = in_tensors_.at(1); | |||
| auto in_shape = input_tensor->shape(); | |||
| int in_rank = in_shape.size(); | |||
| MS_CHECK_LT(axis_, in_rank, RET_ERROR); | |||
| limit_ = in_shape.at(axis_); | |||
| outer_size_ = 1; | |||
| for (int i = 0; i < axis_; ++i) { | |||
| outer_size_ *= in_shape.at(i); | |||
| } | |||
| inner_size_ = 1; | |||
| for (int i = axis_ + 1; i < in_rank; ++i) { | |||
| inner_size_ *= in_shape.at(i); | |||
| } | |||
| indices_element_size_ = indices_tensor->ElementsNum(); | |||
| return RET_OK; | |||
| } | |||
| int DynamicGatherInt8CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor, | |||
| int limit) { | |||
| if (!isIndicesInt32) { | |||
| if (indices_num >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) { | |||
| MS_LOG(ERROR) << "Input indices_num is invalid, indices_num: " << indices_num; | |||
| return RET_ERROR; | |||
| } | |||
| indices_data_ = reinterpret_cast<int32_t *>(ms_context_->allocator->Malloc(sizeof(int32_t) * indices_num)); | |||
| if (indices_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| return RET_ERROR; | |||
| } | |||
| switch (indices_tensor->data_type()) { | |||
| case kNumberTypeInt64: | |||
| for (int i = 0; i < indices_num; i++) { | |||
| indices_data_[i] = static_cast<int>(reinterpret_cast<int64_t *>(indices_tensor->MutableData())[i]); | |||
| if (indices_data_[i] >= limit) { | |||
| MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| break; | |||
| case kNumberTypeFloat: | |||
| case kNumberTypeFloat32: | |||
| for (int i = 0; i < indices_num; i++) { | |||
| indices_data_[i] = static_cast<int>(reinterpret_cast<float *>(indices_tensor->MutableData())[i]); | |||
| if (indices_data_[i] >= limit) { | |||
| MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "Does not support data type: " << indices_tensor->data_type(); | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| indices_data_ = reinterpret_cast<int32_t *>(indices_tensor->MutableData()); | |||
| for (int i = 0; i < limit; ++i) { | |||
| if (indices_data_[i] >= limit) { | |||
| MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " greater or equal to " << limit; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DynamicGatherInt8CPUKernel::DoGather(int task_id) { | |||
| auto input_tensor = in_tensors_.at(0); | |||
| auto indices_tensor = in_tensors_.at(1); | |||
| auto out_tensor = out_tensors_.at(0); | |||
| auto input_ptr = static_cast<int8_t *>(input_tensor->data()); | |||
| CHECK_NULL_RETURN(input_ptr); | |||
| auto output_ptr = static_cast<float *>(out_tensor->data()); | |||
| CHECK_NULL_RETURN(output_ptr); | |||
| int indices_element_size = indices_tensor->ElementsNum(); | |||
| MS_CHECK_GT(indices_element_size, 0, RET_ERROR); | |||
| int stride = UP_DIV(outer_size_, thread_count_); | |||
| int outer_size = MSMIN(stride, outer_size_ - stride * task_id); | |||
| auto thread_stride = stride * task_id; | |||
| input_ptr += thread_stride * inner_size_ * limit_; | |||
| output_ptr += thread_stride * inner_size_ * indices_element_size; | |||
| DynamicGather(input_ptr, outer_size, inner_size_, limit_, indices_data_, indices_element_size_, output_ptr, | |||
| quant_param_->scale_in_, quant_param_->zp_in_); | |||
| return RET_OK; | |||
| } | |||
| int DynamicGather8Run(void *cdata, int task_id, float, float) { | |||
| auto gather_kernel = reinterpret_cast<DynamicGatherInt8CPUKernel *>(cdata); | |||
| auto error_code = gather_kernel->DoGather(task_id); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "GatherRun error task_id[" << task_id << "] error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DynamicGatherInt8CPUKernel::Run() { | |||
| auto indices_tensor = in_tensors_.at(1); | |||
| int indices_num = indices_tensor->ElementsNum(); | |||
| bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32; | |||
| int ret = AssignIndicesData(isIndicesInt32, indices_num, indices_tensor, limit_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "AssignIndicesData failed, error_code[" << ret << "]"; | |||
| return ret; | |||
| } | |||
| int error_code = ParallelLaunch(this->ms_context_, DynamicGather8Run, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Gather function error error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| if (!isIndicesInt32) { | |||
| ms_context_->allocator->Free(indices_data_); | |||
| indices_data_ = nullptr; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,53 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_ | |||
| #include <vector> | |||
| #include "nnacl/gather_parameter.h" | |||
| #include "nnacl/int8/quantize.h" | |||
| #include "src/inner_kernel.h" | |||
| namespace mindspore::kernel { | |||
| class DynamicGatherInt8CPUKernel : public InnerKernel { | |||
| public: | |||
| DynamicGatherInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : InnerKernel(parameter, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {} | |||
| ~DynamicGatherInt8CPUKernel() override; | |||
| int Prepare() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int DoGather(int task_id); | |||
| private: | |||
| int AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor, int limit); | |||
| private: | |||
| int thread_count_ = 0; | |||
| int inner_size_ = 0; | |||
| int limit_ = 0; | |||
| int outer_size_ = 0; | |||
| int axis_ = 0; | |||
| int indices_element_size_ = 0; | |||
| int *indices_data_ = nullptr; | |||
| DynamicGatherQuantArg *quant_param_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DYNAMIC_GATHER_INT8_H_ | |||
| @@ -15,6 +15,7 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/gather_int8.h" | |||
| #include <vector> | |||
| #include "src/runtime/kernel/arm/int8/dynamic_gather_int8.h" | |||
| #include "nnacl/gather_parameter.h" | |||
| #include "nnacl/int8/gather_int8.h" | |||
| #include "nnacl/int8/quantize.h" | |||
| @@ -141,5 +142,38 @@ int GatherInt8CPUKernel::Run() { | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Gather, LiteKernelCreator<GatherInt8CPUKernel>) | |||
| kernel::InnerKernel *GatherInt8CPUKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, | |||
| const lite::Context *ctx, const kernel::KernelKey &desc) { | |||
| if (parameter == nullptr) { | |||
| MS_LOG(ERROR) << "parameter is nullptr."; | |||
| return nullptr; | |||
| } | |||
| InnerKernel *kernel = nullptr; | |||
| if (parameter->quant_type_ == schema::QuantType_QUANT_ALL) { | |||
| kernel = | |||
| new (std::nothrow) GatherInt8CPUKernel(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||
| } else if (parameter->quant_type_ == schema::QuantType_QUANT_DYNAMIC) { | |||
| const int axis_index = 2; | |||
| if (inputs.size() > axis_index + 1 && inputs.at(axis_index)) { | |||
| MS_LOG(ERROR) << "kernel: " << parameter->name_ << " is unsupported Axis is not const."; | |||
| return nullptr; | |||
| } | |||
| kernel = new (std::nothrow) | |||
| DynamicGatherInt8CPUKernel(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx)); | |||
| } else { | |||
| MS_LOG(ERROR) << "kernel: " << parameter->name_ << " is unsupported quant type:" << parameter->quant_type_; | |||
| free(parameter); | |||
| return nullptr; | |||
| } | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr."; | |||
| free(parameter); | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Gather, GatherInt8CPUKernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -349,12 +349,19 @@ void TrainExport::PrepareRemap(int offset) { | |||
| int TrainExport::ExportTensor(const Model *model, const std::vector<mindspore::lite::Tensor *> &tensors, int offset, | |||
| const std::vector<std::pair<size_t, tensor_info>> &map_index, | |||
| const std::vector<std::string> &output_names, const std::set<size_t> &out_set) { | |||
| std::vector<mindspore::lite::Tensor *> in_tensors; | |||
| for (auto index : map_index) { | |||
| auto id = index.first; | |||
| size_t pid = id - static_cast<size_t>(offset); | |||
| mindspore::lite::Tensor *tensor = tensors.at(pid); | |||
| in_tensors.push_back(tensor); | |||
| } | |||
| for (auto index : map_index) { | |||
| auto id = index.first; | |||
| size_t pid = id - static_cast<size_t>(offset); | |||
| mindspore::lite::Tensor *tensor = tensors.at(pid); | |||
| schema::Tensor *scTensor = model->all_tensors_.at(pid); | |||
| auto preferred_dim = WeightDecoder::GetPreferredDim(index.second.op_parameter, index.second.input_index, | |||
| auto preferred_dim = WeightDecoder::GetPreferredDim(in_tensors, index.second.op_parameter, index.second.input_index, | |||
| tensor->shape(), model->version_); | |||
| auto tensorT = CreateTensor(tensor, scTensor, preferred_dim); | |||
| if (tensorT == nullptr) { | |||
| @@ -19,7 +19,6 @@ | |||
| #include "src/huffman_decode.h" | |||
| #include "tools/converter/quantizer/fse_decoder.h" | |||
| #include "nnacl/conv_parameter.h" | |||
| #include "nnacl/gather_parameter.h" | |||
| namespace mindspore::lite { | |||
| namespace { | |||
| @@ -365,7 +364,7 @@ int WeightDecoder::DequantNode(OpParameter *op_parameter, const std::vector<Tens | |||
| int index = 0; | |||
| for (auto &tensor : in_tensors) { | |||
| MS_CHECK_TRUE_RET(tensor != nullptr, RET_ERROR); | |||
| auto preferred_dim = GetPreferredDim(op_parameter, index++, tensor->shape(), model_version); | |||
| auto preferred_dim = GetPreferredDim(in_tensors, op_parameter, index++, tensor->shape(), model_version); | |||
| auto ret = WeightDecoder::DequantTensor(tensor, preferred_dim, dst_data_type); | |||
| if (ret != RET_OK && ret != RET_NO_CHANGE) { | |||
| MS_LOG(DEBUG) << "Dequant tensor failed"; | |||
| @@ -431,13 +430,7 @@ int WeightDecoder::GetDeConvPreferredDim(const OpParameter *op_parameter, const | |||
| } | |||
| } | |||
| int WeightDecoder::GetGatherPreferredDim(const OpParameter *op_parameter) { | |||
| MS_ASSERT(op_parameter != nullptr); | |||
| const auto *param = reinterpret_cast<const GatherParameter *>(op_parameter); | |||
| return param->axis_; | |||
| } | |||
| bool IsChannelFirst(int index, const OpParameter *op_parameter) { | |||
| bool WeightDecoder::IsChannelFirst(int index, const OpParameter *op_parameter) { | |||
| MS_ASSERT(op_parameter != nullptr); | |||
| if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) { | |||
| const auto *param = reinterpret_cast<const MatMulParameter *>(op_parameter); | |||
| @@ -450,24 +443,6 @@ bool IsChannelFirst(int index, const OpParameter *op_parameter) { | |||
| return true; | |||
| } | |||
| int WeightDecoder::GetPreferredDim(const OpParameter *op_parameter, int index, const std::vector<int> &dims, | |||
| const std::string &model_version) { | |||
| const int first_version_offset = 5; | |||
| if (model_version.empty() || | |||
| model_version.substr(model_version.size() - first_version_offset, model_version.size()) < "1.6.0") { | |||
| return IsChannelFirst(index, op_parameter) ? 0 : 1; | |||
| } | |||
| if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) { | |||
| return GetMatMulPreferredDim(op_parameter, index, dims); | |||
| } else if (op_parameter->type_ == schema::PrimitiveType_Conv2dTransposeFusion) { | |||
| return 0; | |||
| } else if (op_parameter->type_ == schema::PrimitiveType_Gather) { | |||
| return GetGatherPreferredDim(op_parameter); | |||
| } | |||
| // The first index. | |||
| return 0; | |||
| } | |||
| bool NeedBitUppackCheck(const SchemaTensorWrapper &src_tensor) { | |||
| MS_ASSERT(src_tensor.handler() != nullptr); | |||
| MS_ASSERT(src_tensor.data() != nullptr); | |||
| @@ -25,6 +25,7 @@ | |||
| #include <string> | |||
| #include <cmath> | |||
| #include "nnacl/matmul_parameter.h" | |||
| #include "nnacl/gather_parameter.h" | |||
| #include "src/lite_kernel.h" | |||
| #include "src/common/utils.h" | |||
| #include "src/tensor.h" | |||
| @@ -137,8 +138,24 @@ class WeightDecoder { | |||
| static int UnPack(const SchemaTensorWrapper &src_tensor, lite::Tensor *dst_tensor); | |||
| static int GetPreferredDim(const OpParameter *op_parameter, int index, const std::vector<int> &dims, | |||
| const std::string &model_version); | |||
| template <typename T> | |||
| static int GetPreferredDim(const std::vector<T *> &in_tensors, const OpParameter *op_parameter, int index, | |||
| const std::vector<int> &dims, const std::string &model_version) { | |||
| const int first_version_offset = 5; | |||
| if (model_version.empty() || | |||
| model_version.substr(model_version.size() - first_version_offset, model_version.size()) < "1.6.0") { | |||
| return IsChannelFirst(index, op_parameter) ? 0 : 1; | |||
| } | |||
| if (op_parameter->type_ == schema::PrimitiveType_MatMulFusion) { | |||
| return GetMatMulPreferredDim(op_parameter, index, dims); | |||
| } else if (op_parameter->type_ == schema::PrimitiveType_Conv2dTransposeFusion) { | |||
| return 0; | |||
| } else if (op_parameter->type_ == schema::PrimitiveType_Gather) { | |||
| return GetGatherPreferredDim(op_parameter, in_tensors); | |||
| } | |||
| // The first index. | |||
| return 0; | |||
| } | |||
| template <typename ST, typename DT = float> | |||
| static DT *DequantData(const lite::Tensor *input_tensor, int preferred_dim) { | |||
| @@ -164,6 +181,8 @@ class WeightDecoder { | |||
| static int DecodeHuffmanCode(const SchemaTensorWrapper &src_tensor, lite::Tensor *dst_tensor); | |||
| static bool IsChannelFirst(int index, const OpParameter *op_parameter); | |||
| template <typename ST, typename DT = float> | |||
| static DT *DequantPerLayerData(const lite::Tensor *input_tensor, const ST *quant_datas) { | |||
| auto quant_param = input_tensor->quant_params(); | |||
| @@ -244,7 +263,23 @@ class WeightDecoder { | |||
| static int GetMatMulPreferredDim(const OpParameter *op_parameter, int input_index, const std::vector<int> &dims); | |||
| static int GetDeConvPreferredDim(const OpParameter *op_parameter, const std::vector<int> &dims); | |||
| static int GetGatherPreferredDim(const OpParameter *op_parameter); | |||
| template <typename T> | |||
| static int GetGatherPreferredDim(const OpParameter *op_parameter, const std::vector<T *> &in_tensors) { | |||
| MS_ASSERT(op_parameter != nullptr); | |||
| const int axis_index = 2; | |||
| const int axis_tensor_size = 3; | |||
| if (in_tensors.size() == axis_tensor_size && in_tensors.at(axis_index)->IsConst()) { | |||
| if (in_tensors.at(axis_index)->data_type() == kNumberTypeInt32) { | |||
| return static_cast<int *>(in_tensors.at(axis_index)->data())[0]; | |||
| } else if (in_tensors.at(axis_index)->data_type() == kNumberTypeInt64) { | |||
| return static_cast<int64_t *>(in_tensors.at(axis_index)->data())[0]; | |||
| } | |||
| } | |||
| const auto *param = reinterpret_cast<const GatherParameter *>(op_parameter); | |||
| return param->axis_; | |||
| } | |||
| static int DequantWeight(lite::Tensor *input_tensor, int preferred_dim, TypeId dst_data_type = kNumberTypeFloat32); | |||
| template <typename T1, typename T2> | |||
| @@ -253,13 +288,14 @@ class WeightDecoder { | |||
| T2 uint_result = 0; | |||
| T1 result; | |||
| UnPackFromUintToOrigin<T2>(packed_data, unpack_bit_data); | |||
| const int base = 2; | |||
| while (static_cast<int>(unpack_bit_data->size()) >= origin_bit) { | |||
| for (int k = 0; k < origin_bit; k++) { | |||
| bool bit_tmp = unpack_bit_data->front(); | |||
| uint_result = (static_cast<size_t>(bit_tmp) << static_cast<unsigned int>(k)) + uint_result; | |||
| unpack_bit_data->pop(); | |||
| } | |||
| result = uint_result - static_cast<T2>(pow(2, origin_bit - 1)); | |||
| result = uint_result - static_cast<T2>(pow(base, origin_bit - 1)); | |||
| (static_cast<T1 *>(unpack_int))[*count] = result; | |||
| uint_result = 0; | |||
| (*count)++; | |||
| @@ -271,7 +307,7 @@ class WeightDecoder { | |||
| uint_result = (static_cast<unsigned int>(bit) << i) + uint_result; | |||
| unpack_bit_data->pop(); | |||
| } | |||
| result = static_cast<T1>(uint_result - static_cast<T2>(pow(2, origin_bit - 1))); | |||
| result = static_cast<T1>(uint_result - static_cast<T2>(pow(base, origin_bit - 1))); | |||
| (static_cast<T1 *>(unpack_int))[*count] = result; | |||
| } | |||
| } | |||
| @@ -193,10 +193,11 @@ int DebugInfoManager::SetOriginStaticInfo(QuantDebugInfo *quant_debug_info, cons | |||
| return RET_OK; | |||
| } | |||
| int DebugInfoManager::SetQuantStaticInfo(OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info, | |||
| int DebugInfoManager::SetQuantStaticInfo(const std::vector<mindspore::tensor::MSTensor *> &inputs, | |||
| OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info, | |||
| const mindspore::lite::Tensor &tensor) { | |||
| auto preferred_dim = | |||
| mindspore::lite::WeightDecoder::GetPreferredDim(op_parameter, tensor_index, tensor.shape(), Version()); | |||
| mindspore::lite::WeightDecoder::GetPreferredDim(inputs, op_parameter, tensor_index, tensor.shape(), Version()); | |||
| float *quant_data; | |||
| if (tensor.data_type() == kNumberTypeInt8) { | |||
| quant_data = mindspore::lite::WeightDecoder::DequantData<int8_t, float>(&tensor, preferred_dim); | |||
| @@ -266,8 +267,10 @@ int DebugInfoManager::AddOriginInfo(const mindspore::CallBackParam &call_back_pa | |||
| return RET_OK; | |||
| } | |||
| int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter, | |||
| bool is_input, int tensor_index, mindspore::lite::Tensor *compared_tensor) { | |||
| int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_param, | |||
| const std::vector<mindspore::tensor::MSTensor *> &inputs, | |||
| OpParameter *op_parameter, bool is_input, int tensor_index, | |||
| mindspore::lite::Tensor *compared_tensor) { | |||
| CHECK_NULL_RETURN(op_parameter); | |||
| CHECK_NULL_RETURN(compared_tensor); | |||
| QuantDebugInfo compared_debug_info; | |||
| @@ -280,7 +283,7 @@ int DebugInfoManager::AddComparedInfo(const mindspore::CallBackParam &call_back_ | |||
| auto is_const = compared_tensor->category() == CONST_TENSOR || compared_tensor->category() == CONST_SCALAR; | |||
| compared_debug_info.tensor_type_flag = is_const ? WEIGHT : ACTIVATION; | |||
| if (!compared_tensor->quant_params().empty()) { | |||
| auto ret = SetQuantStaticInfo(op_parameter, tensor_index, &compared_debug_info, *compared_tensor); | |||
| auto ret = SetQuantStaticInfo(inputs, op_parameter, tensor_index, &compared_debug_info, *compared_tensor); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << compared_tensor->tensor_name() << " get quant static info failed."; | |||
| return RET_ERROR; | |||
| @@ -435,13 +438,13 @@ KernelCallBack DebugInfoManager::GetQuantBeforeCallBack( | |||
| MS_LOG(ERROR) << tensor->tensor_name() << " get const tensor failed."; | |||
| return false; | |||
| } | |||
| ret = AddComparedInfo(call_param, op_parameters.at(call_param.node_name), true, i, &new_tensor); | |||
| ret = AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), true, i, &new_tensor); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << tensor->tensor_name() << " add compared info failed."; | |||
| return false; | |||
| } | |||
| } else { | |||
| auto ret = AddComparedInfo(call_param, op_parameters.at(call_param.node_name), true, i, | |||
| auto ret = AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), true, i, | |||
| static_cast<mindspore::lite::Tensor *>(tensor)); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << tensor->tensor_name() << " add compared info failed."; | |||
| @@ -494,7 +497,7 @@ KernelCallBack DebugInfoManager::GetAfterCallBack(const std::map<std::string, Op | |||
| // all outputs are same dtype. | |||
| for (size_t i = 0; i < outputs.size(); ++i) { | |||
| auto tensor = outputs.at(i); | |||
| AddComparedInfo(call_param, op_parameters.at(call_param.node_name), false, i, | |||
| AddComparedInfo(call_param, inputs, op_parameters.at(call_param.node_name), false, i, | |||
| static_cast<mindspore::lite::Tensor *>(tensor)); | |||
| } | |||
| return true; | |||
| @@ -91,8 +91,9 @@ class DebugInfoManager { | |||
| int AddOriginInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter, bool is_input, | |||
| int tensor_index, mindspore::lite::Tensor *origin_tensor); | |||
| int AddComparedInfo(const mindspore::CallBackParam &call_back_param, OpParameter *op_parameter, bool is_input, | |||
| int tensor_index, mindspore::lite::Tensor *compared_tensor); | |||
| int AddComparedInfo(const mindspore::CallBackParam &call_back_param, | |||
| const std::vector<mindspore::tensor::MSTensor *> &inputs, OpParameter *op_parameter, | |||
| bool is_input, int tensor_index, mindspore::lite::Tensor *compared_tensor); | |||
| void PrintAllDebugInfo(); | |||
| @@ -100,8 +101,8 @@ class DebugInfoManager { | |||
| int SetOriginStaticInfo(QuantDebugInfo *quant_debug_info, const mindspore::lite::Tensor &tensor); | |||
| int SetQuantStaticInfo(OpParameter *op_parameter, int tensor_index, QuantDebugInfo *quant_debug_info, | |||
| const mindspore::lite::Tensor &tensor); | |||
| int SetQuantStaticInfo(const std::vector<mindspore::tensor::MSTensor *> &inputs, OpParameter *op_parameter, | |||
| int tensor_index, QuantDebugInfo *quant_debug_info, const mindspore::lite::Tensor &tensor); | |||
| std::string ParseDataTypeFlagToString(DataTypeFlag data_type_flag); | |||