Merge pull request !6042 from wangchangkai/mastertags/v1.0.0
| @@ -163,4 +163,51 @@ void LiteKernelUtil::InitTensorRefCount(std::vector<kernel::LiteKernel *> &kerne | |||||
| } | } | ||||
| int LiteKernelUtil::SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs) { return -1; } | int LiteKernelUtil::SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs) { return -1; } | ||||
| float *LiteKernelUtil::DequantWeight(lite::Tensor *input_tensor) { | |||||
| MS_ASSERT(input_tensor != nullptr); | |||||
| if (input_tensor->data_type() != kNumberTypeInt8) { | |||||
| MS_LOG(ERROR) << "conv weight input type error" << input_tensor->data_type(); | |||||
| return nullptr; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().empty()) { | |||||
| MS_LOG(ERROR) << "no quant param"; | |||||
| return nullptr; | |||||
| } | |||||
| const auto *quant_data = static_cast<const int8_t *>(input_tensor->MutableData()); | |||||
| auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float))); | |||||
| if (dequant_data == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc faile"; | |||||
| return nullptr; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().size() != kPerTensor) { | |||||
| size_t channels = static_cast<size_t>(input_tensor->Batch()); | |||||
| if (input_tensor->GetQuantParams().size() != channels) { | |||||
| MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; | |||||
| free(dequant_data); | |||||
| return nullptr; | |||||
| } | |||||
| size_t per_channel_size = input_tensor->ElementsNum() / channels; | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| for (size_t i = 0; i < channels; i++) { | |||||
| auto param = quant_param.at(i); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (size_t j = 0; j < per_channel_size; j++) { | |||||
| dequant_data[per_channel_size * i + j] = | |||||
| static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| auto param = quant_param.front(); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { | |||||
| dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| return dequant_data; | |||||
| } | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,6 +28,8 @@ | |||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| static constexpr int kPerTensor = 1; | |||||
| // using mindspore::kernel::AddressPtr; | // using mindspore::kernel::AddressPtr; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| @@ -202,6 +204,8 @@ class LiteKernelUtil { | |||||
| static void InitTensorRefCount(std::vector<kernel::LiteKernel *> &kernels); | static void InitTensorRefCount(std::vector<kernel::LiteKernel *> &kernels); | ||||
| static int SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs); | static int SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs); | ||||
| static float *DequantWeight(lite::Tensor *input_tensor); | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -657,8 +657,8 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { | |||||
| return new PowerGrad(primitive); | return new PowerGrad(primitive); | ||||
| case schema::PrimitiveType_BNGradInput: | case schema::PrimitiveType_BNGradInput: | ||||
| return new BNGradInput(primitive); | return new BNGradInput(primitive); | ||||
| case schema::PrimitiveType_SoftmaxCrossEntroy: | |||||
| return new SoftmaxCrossEntroy(primitive); | |||||
| case schema::PrimitiveType_SoftmaxCrossEntropy: | |||||
| return new SoftmaxCrossEntropy(primitive); | |||||
| case schema::PrimitiveType_Depend: | case schema::PrimitiveType_Depend: | ||||
| return new Depend(primitive); | return new Depend(primitive); | ||||
| case schema::PrimitiveType_FlattenGrad: | case schema::PrimitiveType_FlattenGrad: | ||||
| @@ -324,51 +324,4 @@ int ConvolutionBaseCPUKernel::SetQuantParam() { | |||||
| &conv_param_->conv_quant_arg_.out_act_max_[0]); | &conv_param_->conv_quant_arg_.out_act_max_[0]); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionBaseCPUKernel::RestoreFilter(lite::Tensor *input_tensor) { | |||||
| MS_ASSERT(input_tensor != nullptr); | |||||
| if (input_tensor->data_type() != kNumberTypeUInt8) { | |||||
| MS_LOG(ERROR) << "conv weight input type error" << input_tensor->data_type(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().empty()) { | |||||
| MS_LOG(ERROR) << "no quant param"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData()); | |||||
| auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float))); | |||||
| if (dequant_data == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc faile"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().size() != kPerTensor) { | |||||
| size_t channels = static_cast<size_t>(input_tensor->Batch()); | |||||
| if (input_tensor->GetQuantParams().size() != channels) { | |||||
| MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; | |||||
| free(dequant_data); | |||||
| return RET_ERROR; | |||||
| } | |||||
| size_t per_channel_size = input_tensor->ElementsNum() / channels; | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| for (size_t i = 0; i < channels; i++) { | |||||
| auto param = quant_param.at(i); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (size_t j = 0; j < per_channel_size; j++) { | |||||
| dequant_data[per_channel_size * i + j] = | |||||
| static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| auto param = quant_param.front(); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { | |||||
| dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| input_tensor->SetData(dequant_data); | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -32,7 +32,6 @@ | |||||
| using mindspore::lite::Context; | using mindspore::lite::Context; | ||||
| using mindspore::schema::PadMode; | using mindspore::schema::PadMode; | ||||
| using mindspore::schema::QuantType; | using mindspore::schema::QuantType; | ||||
| static constexpr int kPerTensor = 1; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ConvolutionBaseCPUKernel : public LiteKernel { | class ConvolutionBaseCPUKernel : public LiteKernel { | ||||
| @@ -60,7 +59,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel { | |||||
| int SetQuantMultiplier(); | int SetQuantMultiplier(); | ||||
| int CheckResizeValid(); | int CheckResizeValid(); | ||||
| void FreeQuantParam(); | void FreeQuantParam(); | ||||
| static int RestoreFilter(lite::Tensor *input_tensor); | |||||
| protected: | protected: | ||||
| int tile_num_; | int tile_num_; | ||||
| @@ -53,56 +53,6 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::T | |||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| } | } | ||||
| int RestoreFullconnectWeight(lite::Tensor *input_tensor) { | |||||
| MS_ASSERT(input_tensor != nullptr); | |||||
| if (input_tensor->data_type() != kNumberTypeInt8) { | |||||
| MS_LOG(ERROR) << "full connect input type error" << input_tensor->data_type(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().empty()) { | |||||
| MS_LOG(ERROR) << "no quant param"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| const auto *quant_data = static_cast<const int8_t *>(input_tensor->MutableData()); | |||||
| if (quant_data == nullptr) { | |||||
| MS_LOG(ERROR) << "input_tensor MutableData is nullptr."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float))); | |||||
| if (dequant_data == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc faile"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().size() != kPerTensor) { | |||||
| size_t channels = static_cast<size_t>(input_tensor->Batch()); | |||||
| if (input_tensor->GetQuantParams().size() != channels) { | |||||
| MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; | |||||
| return RET_ERROR; | |||||
| } | |||||
| size_t per_channel_size = input_tensor->ElementsNum() / channels; | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| for (size_t i = 0; i < channels; i++) { | |||||
| auto param = quant_param.at(i); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (size_t j = 0; j < per_channel_size; j++) { | |||||
| dequant_data[per_channel_size * i + j] = | |||||
| static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| auto param = quant_param.front(); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { | |||||
| dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| input_tensor->SetData(dequant_data); | |||||
| return RET_OK; | |||||
| } | |||||
| kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| OpParameter *opParameter, const lite::Context *ctx, | OpParameter *opParameter, const lite::Context *ctx, | ||||
| @@ -114,11 +64,20 @@ kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::T | |||||
| // data of second tensor of fc may be nullptr | // data of second tensor of fc may be nullptr | ||||
| auto *restore_data = weight_tensor->data_c(); | auto *restore_data = weight_tensor->data_c(); | ||||
| if (!weight_tensor->GetQuantParams().empty()) { | if (!weight_tensor->GetQuantParams().empty()) { | ||||
| RestoreFullconnectWeight(inputs.at(kWeightIndex)); | |||||
| auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); | |||||
| if (dequant_weight == nullptr) { | |||||
| MS_LOG(ERROR) << "dequant data is nullptr."; | |||||
| return nullptr; | |||||
| } | |||||
| weight_tensor->SetData(dequant_weight); | |||||
| } | } | ||||
| auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive); | auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive); | ||||
| if (!kernel) { | if (!kernel) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| if (!weight_tensor->GetQuantParams().empty()) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| @@ -126,6 +85,10 @@ kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::T | |||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | ||||
| if (!weight_tensor->GetQuantParams().empty()) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (!weight_tensor->GetQuantParams().empty()) { | if (!weight_tensor->GetQuantParams().empty()) { | ||||
| @@ -23,7 +23,6 @@ | |||||
| #include "nnacl/matmul_parameter.h" | #include "nnacl/matmul_parameter.h" | ||||
| using mindspore::lite::Context; | using mindspore::lite::Context; | ||||
| static constexpr int kPerTensor = 1; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class FullconnectionBaseCPUKernel : public LiteKernel { | class FullconnectionBaseCPUKernel : public LiteKernel { | ||||
| @@ -26,56 +26,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_MatMul; | using mindspore::schema::PrimitiveType_MatMul; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int RestoreMatmulWeight(lite::Tensor *input_tensor) { | |||||
| MS_ASSERT(input_tensor != nullptr); | |||||
| if (input_tensor->data_type() != kNumberTypeUInt8) { | |||||
| MS_LOG(ERROR) << "mat mul input type error" << input_tensor->data_type(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().empty()) { | |||||
| MS_LOG(ERROR) << "no quant param"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData()); | |||||
| if (quant_data == nullptr) { | |||||
| MS_LOG(ERROR) << "input_tensor MutableData is nullptr."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float))); | |||||
| if (dequant_data == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc faile"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().size() != kPerTensor) { | |||||
| size_t channels = static_cast<size_t>(input_tensor->Batch()); | |||||
| if (input_tensor->GetQuantParams().size() != channels) { | |||||
| MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; | |||||
| return RET_ERROR; | |||||
| } | |||||
| size_t per_channel_size = input_tensor->ElementsNum() / channels; | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| for (size_t i = 0; i < channels; i++) { | |||||
| auto param = quant_param.at(i); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (size_t j = 0; j < per_channel_size; j++) { | |||||
| dequant_data[per_channel_size * i + j] = | |||||
| static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| auto param = quant_param.front(); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { | |||||
| dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| input_tensor->SetData(dequant_data); | |||||
| return RET_OK; | |||||
| } | |||||
| kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | ||||
| const lite::Context *ctx, const kernel::KernelKey &desc, | const lite::Context *ctx, const kernel::KernelKey &desc, | ||||
| @@ -89,8 +39,13 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in | |||||
| MS_LOG(ERROR) << "weight_tensor MutableData is nullptr."; | MS_LOG(ERROR) << "weight_tensor MutableData is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| RestoreMatmulWeight(inputs.at(kWeightIndex)); | |||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); | |||||
| if (dequant_weight == nullptr) { | |||||
| MS_LOG(ERROR) << "dequant data is nullptr."; | |||||
| return nullptr; | |||||
| } | |||||
| weight_tensor->SetData(dequant_weight); | |||||
| } | } | ||||
| auto input_tensor = inputs.at(kInputIndex); | auto input_tensor = inputs.at(kInputIndex); | ||||
| @@ -103,6 +58,10 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| @@ -110,10 +69,14 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in | |||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | weight_tensor->FreeData(); | ||||
| weight_tensor->SetData(restore_data); | weight_tensor->SetData(restore_data); | ||||
| } | } | ||||
| @@ -23,7 +23,6 @@ | |||||
| #include "nnacl/matmul_parameter.h" | #include "nnacl/matmul_parameter.h" | ||||
| using mindspore::lite::Context; | using mindspore::lite::Context; | ||||
| static constexpr int kPerTensor = 1; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class MatmulBaseCPUKernel : public LiteKernel { | class MatmulBaseCPUKernel : public LiteKernel { | ||||
| @@ -69,52 +69,6 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) { | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int RestoreMulWeight(lite::Tensor *input_tensor) { | |||||
| MS_ASSERT(input_tensor != nullptr); | |||||
| if (input_tensor->data_type() != kNumberTypeUInt8) { | |||||
| MS_LOG(ERROR) << "full connect input type error" << input_tensor->data_type(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().empty()) { | |||||
| MS_LOG(ERROR) << "no quant param"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData()); | |||||
| auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float))); | |||||
| if (dequant_data == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc faile"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().size() != kPerTensor) { | |||||
| size_t channels = static_cast<size_t>(input_tensor->Batch()); | |||||
| if (input_tensor->GetQuantParams().size() != channels) { | |||||
| MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; | |||||
| return RET_ERROR; | |||||
| } | |||||
| size_t per_channel_size = input_tensor->ElementsNum() / channels; | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| for (size_t i = 0; i < channels; i++) { | |||||
| auto param = quant_param.at(i); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (size_t j = 0; j < per_channel_size; j++) { | |||||
| dequant_data[per_channel_size * i + j] = | |||||
| static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| auto param = quant_param.front(); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { | |||||
| dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| input_tensor->SetData(dequant_data); | |||||
| return RET_OK; | |||||
| } | |||||
| int ArithmeticSelfCPUKernel::Run() { | int ArithmeticSelfCPUKernel::Run() { | ||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -37,7 +37,6 @@ using mindspore::schema::PrimitiveType_Sin; | |||||
| using mindspore::schema::PrimitiveType_Sqrt; | using mindspore::schema::PrimitiveType_Sqrt; | ||||
| using mindspore::schema::PrimitiveType_Square; | using mindspore::schema::PrimitiveType_Square; | ||||
| using mindspore::schema::PrimitiveType_Neg; | using mindspore::schema::PrimitiveType_Neg; | ||||
| static constexpr int kPerTensor = 1; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ArithmeticSelfCPUKernel : public LiteKernel { | class ArithmeticSelfCPUKernel : public LiteKernel { | ||||
| @@ -235,7 +235,12 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> & | |||||
| auto *weight_tensor = inputs.at(kWeightIndex); | auto *weight_tensor = inputs.at(kWeightIndex); | ||||
| auto *restore_data = weight_tensor->MutableData(); | auto *restore_data = weight_tensor->MutableData(); | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | ||||
| ConvolutionBaseCPUKernel::RestoreFilter(inputs.at(kWeightIndex)); | |||||
| auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); | |||||
| if (dequant_weight == nullptr) { | |||||
| MS_LOG(ERROR) << "dequant data is nullptr."; | |||||
| return nullptr; | |||||
| } | |||||
| weight_tensor->SetData(dequant_weight); | |||||
| } | } | ||||
| kernel::LiteKernel *kernel; | kernel::LiteKernel *kernel; | ||||
| @@ -253,6 +258,10 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> & | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| @@ -260,6 +269,10 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> & | |||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " | MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -134,7 +134,12 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *> | |||||
| auto *weight_tensor = inputs.at(kWeightIndex); | auto *weight_tensor = inputs.at(kWeightIndex); | ||||
| auto *restore_data = weight_tensor->MutableData(); | auto *restore_data = weight_tensor->MutableData(); | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | ||||
| ConvolutionBaseCPUKernel::RestoreFilter(inputs.at(kWeightIndex)); | |||||
| auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); | |||||
| if (dequant_weight == nullptr) { | |||||
| MS_LOG(ERROR) << "dequant data is nullptr."; | |||||
| return nullptr; | |||||
| } | |||||
| weight_tensor->SetData(dequant_weight); | |||||
| } | } | ||||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | ||||
| @@ -146,6 +151,10 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *> | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| @@ -153,6 +162,10 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *> | |||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -169,69 +169,32 @@ int ScaleCPUKernel::Run() { | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int RestoreScaleWeight(lite::Tensor *input_tensor) { | |||||
| MS_ASSERT(input_tensor != nullptr); | |||||
| if (input_tensor->data_type() != kNumberTypeUInt8) { | |||||
| MS_LOG(ERROR) << "mat mul input type error" << input_tensor->data_type(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().empty()) { | |||||
| MS_LOG(ERROR) << "no quant param"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData()); | |||||
| auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float))); | |||||
| if (dequant_data == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc faile"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (input_tensor->GetQuantParams().size() != kPerTensor) { | |||||
| size_t channels = static_cast<size_t>(input_tensor->Batch()); | |||||
| if (input_tensor->GetQuantParams().size() != channels) { | |||||
| MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; | |||||
| return RET_ERROR; | |||||
| } | |||||
| size_t per_channel_size = input_tensor->ElementsNum() / channels; | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| for (size_t i = 0; i < channels; i++) { | |||||
| auto param = quant_param.at(i); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (size_t j = 0; j < per_channel_size; j++) { | |||||
| dequant_data[per_channel_size * i + j] = | |||||
| static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| auto quant_param = input_tensor->GetQuantParams(); | |||||
| auto param = quant_param.front(); | |||||
| auto scale = param.scale; | |||||
| auto zero_point = param.zeroPoint; | |||||
| for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { | |||||
| dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale); | |||||
| } | |||||
| } | |||||
| input_tensor->SetData(dequant_data); | |||||
| return RET_OK; | |||||
| } | |||||
| kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | ||||
| const lite::Context *ctx, const kernel::KernelKey &desc, | const lite::Context *ctx, const kernel::KernelKey &desc, | ||||
| const mindspore::lite::PrimitiveC *primitive) { | const mindspore::lite::PrimitiveC *primitive) { | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Scale); | MS_ASSERT(desc.type == schema::PrimitiveType_Scale); | ||||
| auto *weight_tensor = inputs.at(kWeightIndex); | |||||
| auto *restore_data = weight_tensor->MutableData(); | |||||
| if (primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| RestoreScaleWeight(inputs.at(kWeightIndex)); | |||||
| } | |||||
| if (opParameter == nullptr) { | if (opParameter == nullptr) { | ||||
| MS_LOG(ERROR) << "opParameter is nullptr"; | MS_LOG(ERROR) << "opParameter is nullptr"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *weight_tensor = inputs.at(kWeightIndex); | |||||
| auto *restore_data = weight_tensor->MutableData(); | |||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); | |||||
| if (dequant_weight == nullptr) { | |||||
| MS_LOG(ERROR) << "dequant data is nullptr."; | |||||
| return nullptr; | |||||
| } | |||||
| weight_tensor->SetData(dequant_weight); | |||||
| } | |||||
| auto *kernel = new (std::nothrow) ScaleCPUKernel(opParameter, inputs, outputs, ctx, primitive); | auto *kernel = new (std::nothrow) ScaleCPUKernel(opParameter, inputs, outputs, ctx, primitive); | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "New kernel fails."; | MS_LOG(ERROR) << "New kernel fails."; | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -240,9 +203,13 @@ kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::Tensor *> | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | ||||
| delete kernel; | delete kernel; | ||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | |||||
| weight_tensor->SetData(restore_data); | |||||
| } | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { | |||||
| weight_tensor->FreeData(); | weight_tensor->FreeData(); | ||||
| weight_tensor->SetData(restore_data); | weight_tensor->SetData(restore_data); | ||||
| } | } | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/fp32/scale.h" | #include "nnacl/fp32/scale.h" | ||||
| static constexpr int kPerTensor = 1; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ScaleCPUKernel : public LiteKernel { | class ScaleCPUKernel : public LiteKernel { | ||||
| @@ -65,7 +65,7 @@ int ApplyMomentumCPUKernel::Init() { | |||||
| // Only for test with uninitialized Data | // Only for test with uninitialized Data | ||||
| size_t elem_num = in_tensors_[0]->ElementsNum(); | size_t elem_num = in_tensors_[0]->ElementsNum(); | ||||
| auto accumulate = reinterpret_cast<float *>(in_tensors_[1]->MutableData()); | auto accumulate = reinterpret_cast<float *>(in_tensors_[1]->MutableData()); | ||||
| for (int i = 0; i < elem_num; i++) accumulate[i] = 0.0; | |||||
| for (size_t i = 0; i < elem_num; i++) accumulate[i] = 0.0; | |||||
| workspace = new float[elem_num]; | workspace = new float[elem_num]; | ||||
| return 0; | return 0; | ||||
| @@ -0,0 +1 @@ | |||||
| ml_face_openclose.tflite | |||||
| @@ -127,6 +127,27 @@ function Run_x86() { | |||||
| return 1 | return 1 | ||||
| fi | fi | ||||
| done < ${models_mindspore_config} | done < ${models_mindspore_config} | ||||
| # Run tflite weight quantization converted models: | |||||
| while read line; do | |||||
| model_name=${line} | |||||
| if [[ $model_name == \#* ]]; then | |||||
| continue | |||||
| fi | |||||
| echo ${model_name} >> "${run_benchmark_log_file}" | |||||
| echo 'cd '${convertor_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_benchmark_log_file}" | |||||
| cd ${convertor_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1' >> "${run_benchmark_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}_weightquant.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --warmUpLoopCount=1 --loopCount=1 >> "${run_benchmark_log_file}" | |||||
| if [ $? = 0 ]; then | |||||
| run_result='x86: '${model_name}'_weightquant pass' | |||||
| echo ${run_result} >> ${run_benchmark_result_file} | |||||
| else | |||||
| run_result='x86: '${model_name}'_weightquant failed' | |||||
| echo ${run_result} >> ${run_benchmark_result_file} | |||||
| return 1 | |||||
| fi | |||||
| done < ${models_tflite_weightquant_config} | |||||
| } | } | ||||
| # Run on arm64 platform: | # Run on arm64 platform: | ||||
| @@ -432,6 +453,7 @@ models_tflite_config=${basepath}/models_tflite.cfg | |||||
| models_caffe_config=${basepath}/models_caffe.cfg | models_caffe_config=${basepath}/models_caffe.cfg | ||||
| models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg | models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg | ||||
| models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg | models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg | ||||
| models_tflite_weightquant_config=${basepath}/models_tflite_weightquant.cfg | |||||
| models_onnx_config=${basepath}/models_onnx.cfg | models_onnx_config=${basepath}/models_onnx.cfg | ||||
| models_fp16_config=${basepath}/models_fp16.cfg | models_fp16_config=${basepath}/models_fp16.cfg | ||||
| models_mindspore_config=${basepath}/models_mindspore.cfg | models_mindspore_config=${basepath}/models_mindspore.cfg | ||||
| @@ -522,6 +544,17 @@ while read line; do | |||||
| cp ${ms_models_path}/${model_name}.ms ${ms_models_path}/${model_name}.fp16.ms | cp ${ms_models_path}/${model_name}.ms ${ms_models_path}/${model_name}.fp16.ms | ||||
| done < ${models_fp16_config} | done < ${models_fp16_config} | ||||
| # Convert weightquant models: | |||||
| while read line; do | |||||
| model_name=${line} | |||||
| if [[ $model_name == \#* ]]; then | |||||
| continue | |||||
| fi | |||||
| echo ${model_name} >> "${run_benchmark_log_file}" | |||||
| echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16' >> "${run_benchmark_log_file}" | |||||
| ./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16 || Convert_status=$? | |||||
| done < ${models_tflite_weightquant_config} | |||||
| # Check all result and return value | # Check all result and return value | ||||
| if [[ ${Convert_status} = 0 ]];then | if [[ ${Convert_status} = 0 ]];then | ||||
| echo "convert is ended" | echo "convert is ended" | ||||
| @@ -72,7 +72,7 @@ ValueNodePtr AnfImporterFromMetaGraphT::ConvertPrimitive(const std::unique_ptr<s | |||||
| auto primitiveCValue = PrimitiveC::Create(cNode->primitive.release()); | auto primitiveCValue = PrimitiveC::Create(cNode->primitive.release()); | ||||
| cNode->primitive = nullptr; | cNode->primitive = nullptr; | ||||
| // add quant parameter | // add quant parameter | ||||
| if (cNode->quantType != schema::QuantType_PostTraining) { | |||||
| if (cNode->quantType != schema::QuantType_PostTraining && cNode->quantType != schema::QuantType_WeightQuant) { | |||||
| primitiveCValue->SetQuantType(cNode->quantType); | primitiveCValue->SetQuantType(cNode->quantType); | ||||
| for (int index : cNode->inputIndex) { | for (int index : cNode->inputIndex) { | ||||
| if (meta_graph_->allTensors[index]->quantParams.size() > 0) { | if (meta_graph_->allTensors[index]->quantParams.size() > 0) { | ||||
| @@ -64,10 +64,15 @@ FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &old_graph, const conver | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| } else if (config->quantType == schema::QuantType_WeightQuant) { | } else if (config->quantType == schema::QuantType_WeightQuant) { | ||||
| auto bitNum = static_cast<size_t>(std::stoull(config->bitNum)); | |||||
| if (bitNum != quant::UINT8_QUANTIZATION) { | |||||
| MS_LOG(ERROR) << "Current Only Support 8 bit weight quant"; | |||||
| return nullptr; | |||||
| } | |||||
| this->mQuantizer = std::make_unique<quant::WeightQuantizer>( | this->mQuantizer = std::make_unique<quant::WeightQuantizer>( | ||||
| new_graph, config->quantSize, config->convWeightQuantChannelThreshold, config->bitNum); | new_graph, config->quantSize, config->convWeightQuantChannelThreshold, config->bitNum); | ||||
| if (mQuantizer == nullptr) { | if (mQuantizer == nullptr) { | ||||
| MS_LOG(ERROR) << "New PostTrainingQuantizer failed"; | |||||
| MS_LOG(ERROR) << "New WeightQuantizer failed"; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| } | } | ||||
| @@ -31,7 +31,7 @@ Flags::Flags() { | |||||
| "Input model weight file path. Needed when fmk is CAFFE. CAFFE: *.caffemodel", ""); | "Input model weight file path. Needed when fmk is CAFFE. CAFFE: *.caffemodel", ""); | ||||
| AddFlag(&Flags::inferenceTypeIn, "inferenceType", | AddFlag(&Flags::inferenceTypeIn, "inferenceType", | ||||
| "Real data type saved in output file, reserved param, NOT used for now. FLOAT | INT8", "FLOAT"); | "Real data type saved in output file, reserved param, NOT used for now. FLOAT | INT8", "FLOAT"); | ||||
| AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining", ""); | |||||
| AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining | WeightQuant", ""); | |||||
| AddFlag(&Flags::inputInferenceTypeIn, "inputInferenceType", "Input inference data type. FLOAT | INT8", "FLOAT"); | AddFlag(&Flags::inputInferenceTypeIn, "inputInferenceType", "Input inference data type. FLOAT | INT8", "FLOAT"); | ||||
| AddFlag(&Flags::stdDev, "stdDev", "Standard deviation value for aware-quantization", "128"); | AddFlag(&Flags::stdDev, "stdDev", "Standard deviation value for aware-quantization", "128"); | ||||
| AddFlag(&Flags::mean, "mean", "Mean value for aware-quantization", "-0.5"); | AddFlag(&Flags::mean, "mean", "Mean value for aware-quantization", "-0.5"); | ||||
| @@ -86,6 +86,7 @@ STATUS WeightFormatHardCodePass::HardCodeCAFFE(const std::unique_ptr<CNodeT> &no | |||||
| MS_ASSERT(node->primitive != nullptr); | MS_ASSERT(node->primitive != nullptr); | ||||
| auto opType = node->primitive->value.type; | auto opType = node->primitive->value.type; | ||||
| switch (this->quantType) { | switch (this->quantType) { | ||||
| case QuantType_WeightQuant: | |||||
| case QuantType_QUANT_NONE: { | case QuantType_QUANT_NONE: { | ||||
| if (opType == schema::PrimitiveType_Conv2D || opType == schema::PrimitiveType_DepthwiseConv2D || | if (opType == schema::PrimitiveType_Conv2D || opType == schema::PrimitiveType_DepthwiseConv2D || | ||||
| opType == schema::PrimitiveType_DeConv2D || opType == schema::PrimitiveType_DeDepthwiseConv2D) { | opType == schema::PrimitiveType_DeConv2D || opType == schema::PrimitiveType_DeDepthwiseConv2D) { | ||||
| @@ -123,6 +124,7 @@ STATUS WeightFormatHardCodePass::HardCodeONNX(const std::unique_ptr<CNodeT> &nod | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } break; | } break; | ||||
| case QuantType_WeightQuant: | |||||
| case QuantType_QUANT_NONE: { | case QuantType_QUANT_NONE: { | ||||
| // conv (K x C/group x kH x kW) group = 1 | // conv (K x C/group x kH x kW) group = 1 | ||||
| // depth (K x C/group x kH x kW) group = channelOut ==> (K, multiplier, H, W) | // depth (K x C/group x kH x kW) group = channelOut ==> (K, multiplier, H, W) | ||||
| @@ -162,6 +164,7 @@ STATUS WeightFormatHardCodePass::HardCodeMS(const std::unique_ptr<CNodeT> &node, | |||||
| weightTensor->format = schema::Format::Format_KCHW; | weightTensor->format = schema::Format::Format_KCHW; | ||||
| } | } | ||||
| } break; | } break; | ||||
| case QuantType_WeightQuant: | |||||
| case QuantType_QUANT_NONE: { | case QuantType_QUANT_NONE: { | ||||
| // sum up from current ms quant models | // sum up from current ms quant models | ||||
| if (opType == PrimitiveType_Conv2D) { | if (opType == PrimitiveType_Conv2D) { | ||||
| @@ -66,13 +66,14 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list<CNodePtr> &nodes) { | |||||
| ParamValueLitePtr param_value = std::static_pointer_cast<ParamValueLite>(param_node->default_param()); | ParamValueLitePtr param_value = std::static_pointer_cast<ParamValueLite>(param_node->default_param()); | ||||
| auto status = | auto status = | ||||
| QuantFilter<uint8_t>(param_value, primitive_c, QuantType_WeightQuant, 255, 0, bitNum, true, depthwise); | |||||
| QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, | |||||
| quant_max, quant_min, bitNum, true, depthwise); | |||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "QuantFilter failed : " << status; | MS_LOG(ERROR) << "QuantFilter failed : " << status; | ||||
| return status; | return status; | ||||
| } | } | ||||
| // set dtype | // set dtype | ||||
| param_value->set_tensor_type(kNumberTypeUInt8); | |||||
| param_value->set_tensor_type(kNumberTypeInt8); | |||||
| auto abstractBase = param_node->abstract(); | auto abstractBase = param_node->abstract(); | ||||
| if (abstractBase == nullptr) { | if (abstractBase == nullptr) { | ||||
| MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << param_node->name(); | MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << param_node->name(); | ||||
| @@ -83,7 +84,7 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list<CNodePtr> &nodes) { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase); | auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase); | ||||
| abstractTensor->element()->set_type(TypeIdToType(kNumberTypeUInt8)); | |||||
| abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8)); | |||||
| primitive_c->SetQuantType(schema::QuantType_WeightQuant); | primitive_c->SetQuantType(schema::QuantType_WeightQuant); | ||||
| } | } | ||||
| @@ -128,12 +129,13 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list<CNodePtr> &nodes) { | |||||
| std::vector<schema::QuantParamT> quant_params; | std::vector<schema::QuantParamT> quant_params; | ||||
| primitive_c->AddInputQuantParam(quant_params); | primitive_c->AddInputQuantParam(quant_params); | ||||
| auto status = QuantFilter<uint8_t>(param_value, primitive_c, QuantType_WeightQuant, 255, 0, bitNum, true, false); | |||||
| auto status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, | |||||
| quant_max, quant_min, bitNum, true, false); | |||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "QuantFilter failed : " << status; | MS_LOG(ERROR) << "QuantFilter failed : " << status; | ||||
| return status; | return status; | ||||
| } | } | ||||
| param_value->set_tensor_type(kNumberTypeUInt8); | |||||
| param_value->set_tensor_type(kNumberTypeInt8); | |||||
| // set dtype | // set dtype | ||||
| auto abstractBase = param_node->abstract(); | auto abstractBase = param_node->abstract(); | ||||
| if (abstractBase == nullptr) { | if (abstractBase == nullptr) { | ||||
| @@ -145,7 +147,7 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list<CNodePtr> &nodes) { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase); | auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase); | ||||
| abstractTensor->element()->set_type(TypeIdToType(kNumberTypeUInt8)); | |||||
| abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8)); | |||||
| primitive_c->SetQuantType(schema::QuantType_WeightQuant); | primitive_c->SetQuantType(schema::QuantType_WeightQuant); | ||||
| } | } | ||||
| @@ -41,7 +41,8 @@ class WeightQuantizer : public Quantizer { | |||||
| STATUS DoQuantize(FuncGraphPtr funcGraph) override; | STATUS DoQuantize(FuncGraphPtr funcGraph) override; | ||||
| STATUS DoConvQuantize(const std::list<CNodePtr> &nodes); | STATUS DoConvQuantize(const std::list<CNodePtr> &nodes); | ||||
| STATUS DoMulQuantize(const std::list<CNodePtr> &nodes); | STATUS DoMulQuantize(const std::list<CNodePtr> &nodes); | ||||
| int quant_max{INT8_MAX}; | |||||
| int quant_min{INT8_MIN}; | |||||
| private: | private: | ||||
| std::unique_ptr<QuantStrategy> mStrategy; | std::unique_ptr<QuantStrategy> mStrategy; | ||||
| size_t bitNum; | size_t bitNum; | ||||