From: @xutianchun Reviewed-by: Signed-off-by:tags/v1.2.0-rc1
| @@ -8,20 +8,21 @@ file(GLOB KERNEL_SRC | |||
| ) | |||
| list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) | |||
| if (SUPPORT_TRAIN) | |||
| file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc) | |||
| if(SUPPORT_TRAIN) | |||
| file(GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc) | |||
| set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC}) | |||
| endif() | |||
| add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC}) | |||
| add_dependencies(cpu_kernel_mid fbs_src) | |||
| if (PLATFORM_ARM64) | |||
| if (ENABLE_FP16) | |||
| if(PLATFORM_ARM64) | |||
| if(ENABLE_FP16) | |||
| file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc) | |||
| add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC}) | |||
| endif () | |||
| add_dependencies(cpu_fp16_kernel_mid fbs_src) | |||
| endif() | |||
| file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) | |||
| add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC}) | |||
| endif () | |||
| add_dependencies(cpu_kernel_mid fbs_src) | |||
| endif() | |||
| @@ -1,3 +1,3 @@ | |||
| retinaface_732_1280_iod.mindir | |||
| mobilefacenet_iod.mindir | |||
| effnet_iod.mindir | |||
| #effnet_iod.mindir | |||
| @@ -540,9 +540,9 @@ function Run_x86() { | |||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_log_file}" | |||
| if [ $? = 0 ]; then | |||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| run_result='x86: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| else | |||
| run_result='x86: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| run_result='x86: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| fi | |||
| done < ${models_mindspore_weightquant_config} | |||
| @@ -806,9 +806,9 @@ function Run_x86_sse() { | |||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}" | |||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_sse_log_file}" | |||
| if [ $? = 0 ]; then | |||
| run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| run_result='x86_sse: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| else | |||
| run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| run_result='x86_sse: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| fi | |||
| done < ${models_mindspore_weightquant_config} | |||
| @@ -1072,9 +1072,9 @@ function Run_x86_avx() { | |||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_avx_log_file}" | |||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_avx_log_file}" | |||
| if [ $? = 0 ]; then | |||
| run_result='x86_avx: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| run_result='x86_avx: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| else | |||
| run_result='x86_avx: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| run_result='x86_avx: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| fi | |||
| done < ${models_mindspore_weightquant_config} | |||
| @@ -1624,9 +1624,9 @@ function Run_arm64() { | |||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.weightquant.ms.out --loopCount=1' >> adb_run_cmd.txt | |||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | |||
| if [ $? = 0 ]; then | |||
| run_result='arm64: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| run_result='arm64: '${model_name}'[weightQuant] pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||
| else | |||
| run_result='arm64: '${model_name}'_train failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| run_result='arm64: '${model_name}'[weightQuant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||
| fi | |||
| done < ${models_mindspore_weightquant_config} | |||
| @@ -141,11 +141,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) { | |||
| // init old node indecies | |||
| auto old_nodes = GetGraphNodes(); | |||
| Optimizer formatTransOptimizer; | |||
| auto formatTransPass = new (std::nothrow) FormatTransPass(); | |||
| if (formatTransPass == nullptr) { | |||
| MS_LOG(ERROR) << "new formatTransPass failed"; | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| formatTransOptimizer.AddPass(new (std::nothrow) FormatTransFusionPass()); | |||
| formatTransOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass()); | |||
| formatTransOptimizer.AddPass(new (std::nothrow) TransOpRemovePass()); | |||
| @@ -164,11 +159,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) { | |||
| // init old node indecies | |||
| auto old_nodes = GetGraphNodes(); | |||
| Optimizer formatTransOptimizer; | |||
| auto formatTransPass = new (std::nothrow) FormatTransPass(); | |||
| if (formatTransPass == nullptr) { | |||
| MS_LOG(ERROR) << "new formatTransPass failed"; | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| if (!ctx.trainModel && ctx.fmk != converter::FmkType_ONNX) { | |||
| formatTransOptimizer.AddPass(new (std::nothrow) GlobalFormatTransformPass()); | |||
| formatTransOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass()); | |||
| @@ -418,6 +418,13 @@ PostTrainingQuantizer::PostTrainingQuantizer(FuncGraphPtr graph, string path, in | |||
| } | |||
| } | |||
| PostTrainingQuantizer::~PostTrainingQuantizer() { | |||
| delete fp32_session_; | |||
| delete fp32_model_; | |||
| delete int8_session_; | |||
| delete int8_model_; | |||
| } | |||
| STATUS PostTrainingQuantizer::DoQuantInput(double scale, int32_t zeropoint, struct MaxMin *max_min, | |||
| const std::shared_ptr<PrimitiveC> &lite_primitive) const { | |||
| MS_ASSERT(max_min != nullptr); | |||
| @@ -1435,8 +1442,10 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) { | |||
| // anf -- fb | |||
| flags.quantType = schema::QuantType_QUANT_NONE; | |||
| MS_LOG(INFO) << "start create session"; | |||
| fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum()); | |||
| if (fp32_session_ == nullptr) { | |||
| auto sm = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum()); | |||
| fp32_session_ = sm.session; | |||
| fp32_model_ = sm.model; | |||
| if (fp32_session_ == nullptr || fp32_model_ == nullptr) { | |||
| MS_LOG(ERROR) << "create session failed!"; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -1481,8 +1490,10 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) { | |||
| // init in8 session | |||
| MS_LOG(INFO) << "create quant session"; | |||
| flags.quantType = schema::QuantType_PostTraining; | |||
| int8_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum()); | |||
| if (int8_session_ == nullptr) { | |||
| auto int8_sm = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum()); | |||
| int8_session_ = int8_sm.session; | |||
| int8_model_ = int8_sm.model; | |||
| if (int8_session_ == nullptr || int8_model_ == nullptr) { | |||
| MS_LOG(ERROR) << "create session failed!"; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -46,7 +46,7 @@ class PostTrainingQuantizer : public Quantizer { | |||
| public: | |||
| PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8, | |||
| bool per_channel = true); | |||
| ~PostTrainingQuantizer() = default; | |||
| ~PostTrainingQuantizer(); | |||
| STATUS DoQuantize(FuncGraphPtr func_graph) override; | |||
| @@ -64,7 +64,9 @@ class PostTrainingQuantizer : public Quantizer { | |||
| std::unique_ptr<Calibrator> calibrator_; | |||
| session::LiteSession *fp32_session_{nullptr}; | |||
| Model *fp32_model_{nullptr}; | |||
| session::LiteSession *int8_session_{nullptr}; | |||
| Model *int8_model_{nullptr}; | |||
| std::map<std::string, std::vector<float>> fp32_op_input_map; // concurency | |||
| std::map<std::string, std::vector<float>> fp32_op_output_ch_mean_map; // concurency | |||
| @@ -134,14 +134,14 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const { | |||
| } | |||
| if (node->size() < 3) { | |||
| MS_LOG(INFO) << "input size less!"; | |||
| MS_LOG(INFO) << node->fullname_with_scope() << " input size less!"; | |||
| return false; | |||
| } | |||
| auto inputNode1 = node->input(1); | |||
| auto inputNode2 = node->input(2); | |||
| if (inputNode1 == nullptr || inputNode2 == nullptr) { | |||
| MS_LOG(INFO) << "mul input is nullptr!"; | |||
| MS_LOG(INFO) << node->fullname_with_scope() << " mul input is nullptr!"; | |||
| return false; | |||
| } | |||
| @@ -153,7 +153,7 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const { | |||
| } | |||
| if (paramNode == nullptr) { | |||
| MS_LOG(INFO) << "invalid paramNode!"; | |||
| MS_LOG(INFO) << node->fullname_with_scope() << " invalid paramNode!"; | |||
| return false; | |||
| } | |||
| @@ -480,6 +480,48 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode) { | |||
| return (schema::PrimitiveType)primitive_c->Type(); | |||
| } | |||
| std::vector<int> DataToVector(const string &str) { | |||
| std::vector<int> result; | |||
| auto raw_datas = str; | |||
| auto ind = raw_datas.find(','); | |||
| while (ind != std::string::npos) { | |||
| auto data = raw_datas.substr(0, ind); | |||
| Trim(&data); | |||
| result.push_back(std::stoul(data)); | |||
| raw_datas = raw_datas.substr(ind + 1); | |||
| Trim(&raw_datas); | |||
| ind = raw_datas.find(','); | |||
| } | |||
| if (!raw_datas.empty()) { | |||
| result.push_back(std::stoul(raw_datas)); | |||
| } | |||
| if (result.empty()) { | |||
| MS_LOG(ERROR) << "result is empty"; | |||
| } | |||
| return result; | |||
| } | |||
| std::vector<std::vector<int>> DataToVectors(const string &str) { | |||
| std::vector<std::vector<int>> result; | |||
| auto raw_datas = str; | |||
| auto ind = raw_datas.find(';'); | |||
| while (ind != std::string::npos) { | |||
| auto data = raw_datas.substr(0, ind); | |||
| Trim(&data); | |||
| result.push_back(DataToVector(data)); | |||
| raw_datas = raw_datas.substr(ind + 1); | |||
| Trim(&raw_datas); | |||
| ind = raw_datas.find(';'); | |||
| } | |||
| if (!raw_datas.empty()) { | |||
| result.push_back(DataToVector(raw_datas)); | |||
| } | |||
| if (result.empty()) { | |||
| MS_LOG(ERROR) << "result is empty"; | |||
| } | |||
| return result; | |||
| } | |||
| STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config) { | |||
| if (post_quant_config == nullptr) { | |||
| MS_LOG(ERROR) << "post_quant_config is null."; | |||
| @@ -559,6 +601,20 @@ STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_conf | |||
| } | |||
| } else if (key == "mean_error_threshold") { | |||
| post_quant_config->mean_error_threshold = std::stof(value); | |||
| } else if (key == "input_shapes") { | |||
| auto &raw_shape = value; | |||
| auto ind = raw_shape.find('/'); | |||
| while (ind != std::string::npos) { | |||
| auto shape = raw_shape.substr(0, ind); | |||
| Trim(&shape); | |||
| post_quant_config->input_shapes.push_back(DataToVectors(shape)); | |||
| raw_shape = raw_shape.substr(ind + 1); | |||
| Trim(&raw_shape); | |||
| ind = raw_shape.find('/'); | |||
| } | |||
| if (!raw_shape.empty()) { | |||
| post_quant_config->input_shapes.push_back(DataToVectors(raw_shape)); | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "unsupported parameter: " << key; | |||
| } | |||
| @@ -578,12 +634,12 @@ STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_conf | |||
| return RET_OK; | |||
| } | |||
| session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, | |||
| int thread_num) { | |||
| SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num) { | |||
| SessionModel sm; | |||
| auto meta_graph = Export(func_graph, true, true); | |||
| if (meta_graph == nullptr) { | |||
| MS_LOG(ERROR) << "Export to meta_graph failed"; | |||
| return nullptr; | |||
| return sm; | |||
| } | |||
| // transform | |||
| @@ -592,7 +648,7 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c | |||
| auto status = fb_transform.Transform(flags); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "FBTransform model failed"; | |||
| return nullptr; | |||
| return sm; | |||
| } | |||
| meta_graph->version = Version(); | |||
| @@ -604,12 +660,12 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c | |||
| auto *content = reinterpret_cast<const char *>(builder.GetBufferPointer()); | |||
| if (content == nullptr) { | |||
| MS_LOG(ERROR) << "GetBufferPointer return null"; | |||
| return nullptr; | |||
| return sm; | |||
| } | |||
| auto model = lite::Model::Import(content, size); | |||
| if (model == nullptr) { | |||
| MS_LOG(ERROR) << "Import model failed"; | |||
| return nullptr; | |||
| return sm; | |||
| } | |||
| Context ctx; | |||
| @@ -618,16 +674,19 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c | |||
| auto session = session::LiteSession::CreateSession(&ctx); | |||
| if (session == nullptr) { | |||
| MS_LOG(ERROR) << "create session failed."; | |||
| return nullptr; | |||
| return sm; | |||
| } | |||
| status = session->CompileGraph(model); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "CompileGraph error"; | |||
| return nullptr; | |||
| return sm; | |||
| } | |||
| model->Free(); | |||
| return session; | |||
| delete meta_graph; | |||
| sm.session = session; | |||
| sm.model = model; | |||
| return sm; | |||
| } | |||
| STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited, | |||
| @@ -805,4 +864,21 @@ void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, ParamVal | |||
| return; | |||
| } | |||
| } | |||
| STATUS UpdateTensorDataAndSize(ParamValueLitePtr weight, void *quant_datas, int new_size) { | |||
| MS_ASSERT(weight != nullptr); | |||
| MS_ASSERT(new_size > 0); | |||
| delete[] reinterpret_cast<char *>(weight->tensor_addr()); | |||
| char *new_tensor_data = new (std::nothrow) char[new_size]; | |||
| if (new_tensor_data == nullptr) { | |||
| MS_LOG(ERROR) << "new data error"; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(new_tensor_data, quant_datas, new_size); | |||
| weight->set_tensor_size(new_size); | |||
| weight->set_tensor_addr(new_tensor_data); | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite::quant | |||
| @@ -57,9 +57,15 @@ struct PostQuantConfig { | |||
| bool bias_correction{false}; | |||
| bool mixed{false}; | |||
| float mean_error_threshold{0.04}; | |||
| std::vector<std::vector<std::vector<int>>> input_shapes; // different input | |||
| bool inited{false}; | |||
| }; | |||
| struct SessionModel { | |||
| session::LiteSession *session{nullptr}; | |||
| Model *model{nullptr}; | |||
| }; | |||
| /** | |||
| * 1. when op's weight size > mWeightSize just skip | |||
| * 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization | |||
| @@ -97,6 +103,8 @@ std::pair<float, float> OutlierMethod(std::vector<float> min_datas, std::vector< | |||
| std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam); | |||
| STATUS UpdateTensorDataAndSize(ParamValueLitePtr weight, void *quant_datas, int new_size); | |||
| template <typename T> | |||
| T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| @@ -148,27 +156,17 @@ T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quan | |||
| return static_cast<T>(quant_data); | |||
| }(); | |||
| } | |||
| template <typename T> | |||
| STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<PrimitiveC> &primitive_c, QuantType quantType, | |||
| int quant_max, int quant_min, size_t bitNum, bool per_channel, bool k_means = false) { | |||
| int quant_max, int quant_min, size_t bitNum, bool per_channel, int index = 1, bool k_means = false) { | |||
| MS_ASSERT(weight != nullptr); | |||
| MS_ASSERT(primitive_c != nullptr); | |||
| auto dims = weight->tensor_shape(); | |||
| auto op_type = (schema::PrimitiveType)primitive_c->Type(); | |||
| if (per_channel) { | |||
| if (dims.size() != 4 && dims.size() != 2 && op_type != schema::PrimitiveType_MatMul) { | |||
| MS_LOG(INFO) << "weight dims size: " << dims.size() << " switch to per-layer quant mode."; | |||
| if (dims.size() <= 1) { | |||
| MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel"; | |||
| per_channel = false; | |||
| } else { | |||
| if (dims.size() == 2 && op_type != schema::PrimitiveType_FullConnection) { | |||
| MS_LOG(INFO) << "weight dims size is 2 but op_type is not FullConnection, switch to per-layer quant mode."; | |||
| per_channel = false; | |||
| } | |||
| uint32_t channels = dims[0]; | |||
| if (channels == 0) { | |||
| MS_LOG(ERROR) << "channels is 0"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| } | |||
| @@ -261,12 +259,11 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit | |||
| } | |||
| quant_params.emplace_back(quant_param); | |||
| } | |||
| auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T)); | |||
| if (ret != EOK) { | |||
| MS_LOG(ERROR) << "memcpy error: " << ret; | |||
| auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T)); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||
| return RET_ERROR; | |||
| } | |||
| weight->set_tensor_size(elem_count * sizeof(T)); | |||
| } else { | |||
| // per layer | |||
| float min = FLT_MAX; | |||
| @@ -294,12 +291,11 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit | |||
| quant_datas[i] = quant_data; | |||
| } | |||
| } | |||
| auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T)); | |||
| if (ret != EOK) { | |||
| MS_LOG(ERROR) << "memcpy error: " << ret; | |||
| auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T)); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||
| return RET_ERROR; | |||
| } | |||
| weight->set_tensor_size(elem_count * sizeof(T)); | |||
| } | |||
| // do bit pack | |||
| @@ -311,21 +307,19 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit | |||
| if (bitNum > 0 && bitNum < 8) { | |||
| std::vector<uint8_t> pack_data{}; | |||
| BitPack::BitPacking<T, uint8_t>(bitNum, data, &pack_data); | |||
| auto ret = memcpy_s(raw_datas, weight->tensor_size(), pack_data.data(), pack_data.size() * sizeof(uint8_t)); | |||
| if (ret != EOK) { | |||
| MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed"; | |||
| auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint8_t)); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||
| return RET_ERROR; | |||
| } | |||
| weight->set_tensor_size(pack_data.size() * sizeof(uint8_t)); | |||
| } else if (bitNum > 8 && bitNum < 16) { | |||
| std::vector<uint16_t> pack_data{}; | |||
| BitPack::BitPacking<T, uint16_t>(bitNum, data, &pack_data); | |||
| auto ret = memcpy_s(raw_datas, weight->tensor_size(), pack_data.data(), pack_data.size() * sizeof(uint16_t)); | |||
| if (ret != EOK) { | |||
| MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed"; | |||
| auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint16_t)); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||
| return RET_ERROR; | |||
| } | |||
| weight->set_tensor_size(pack_data.size() * sizeof(uint16_t)); | |||
| } | |||
| } | |||
| @@ -336,7 +330,7 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit | |||
| if (quantType == QuantType_PostTraining) { | |||
| primitive_c->AddInputQuantParam(quant_params); | |||
| } else { | |||
| primitive_c->set_input_quant_param(WEIGHT_INDEX, quant_params); | |||
| primitive_c->set_input_quant_param(index, quant_params); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -347,8 +341,7 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode); | |||
| STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config); | |||
| session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, | |||
| int thread_num); | |||
| SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num); | |||
| STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited, | |||
| std::vector<std::vector<std::string>> *inputs); | |||
| @@ -359,6 +352,5 @@ STATUS CopyInputDataToTensor(size_t input_index, size_t image_index, | |||
| FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &); | |||
| void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, ParamValueLitePtr *param_value); | |||
| } // namespace mindspore::lite::quant | |||
| #endif | |||
| @@ -84,7 +84,13 @@ WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const std::string &config_f | |||
| } | |||
| } | |||
| WeightQuantizer::~WeightQuantizer() { delete fp32_session_; } | |||
| WeightQuantizer::~WeightQuantizer() { | |||
| for (const auto &fp32_output_tensor : fp32_output_tensors_) { | |||
| for (const auto &kv : fp32_output_tensor) { | |||
| delete kv.second; | |||
| } | |||
| } | |||
| } | |||
| STATUS WeightQuantizer::SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node, | |||
| std::shared_ptr<PrimitiveC> primitive_c) { | |||
| @@ -278,11 +284,11 @@ STATUS WeightQuantizer::DoLstmQuntize(CNodePtr cnode) { | |||
| } | |||
| auto status = RET_ERROR; | |||
| if (type_id_ == kNumberTypeInt8) { | |||
| status = | |||
| QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false); | |||
| status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, | |||
| false, 2); | |||
| } else if (type_id_ == kNumberTypeInt16) { | |||
| status = | |||
| QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false); | |||
| status = QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, | |||
| false, 2); | |||
| } | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "QuantFilter failed : " << status; | |||
| @@ -438,15 +444,73 @@ float CompareOutputData(const std::unordered_map<std::string, mindspore::tensor: | |||
| return total_mean_error / tensor_cnt; | |||
| } | |||
| STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) { | |||
| STATUS WeightQuantizer::RunFp32Graph(FuncGraphPtr func_graph) { | |||
| auto image_cnt = images_.at(0).size(); | |||
| if (!config_param_.input_shapes.empty()) { | |||
| if (config_param_.input_shapes.size() != image_cnt) { | |||
| MS_LOG(ERROR) << "input_shapes size: " << config_param_.input_shapes.size() << " image_cnt: " << image_cnt; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| // 0.1 Create Fp32 Session | |||
| flags.quantType = schema::QuantType_QUANT_NONE; | |||
| fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num); | |||
| if (fp32_session_ == nullptr) { | |||
| auto fp32_sm = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num); | |||
| auto fp32_session = fp32_sm.session; | |||
| auto fp32_model = fp32_sm.model; | |||
| if (fp32_session == nullptr || fp32_model == nullptr) { | |||
| MS_LOG(ERROR) << "CreateSessoin fail"; | |||
| delete fp32_model; | |||
| return RET_ERROR; | |||
| } | |||
| auto fp32_inputs = fp32_session_->GetInputs(); | |||
| auto fp32_inputs = fp32_session->GetInputs(); | |||
| fp32_output_tensors_.resize(image_cnt); | |||
| // 0.3 save fp32 output | |||
| for (size_t i = 0; i < image_cnt; i++) { | |||
| if (!config_param_.input_shapes.empty()) { | |||
| auto status = fp32_session->Resize(fp32_inputs, {config_param_.input_shapes[i]}); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "session Resize fail"; | |||
| delete fp32_sm.session; | |||
| delete fp32_sm.model; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) { | |||
| auto status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "generate input data from images failed!"; | |||
| delete fp32_sm.session; | |||
| delete fp32_sm.model; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| auto status = fp32_session->RunGraph(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "RunGraph fail"; | |||
| delete fp32_sm.session; | |||
| delete fp32_sm.model; | |||
| return RET_ERROR; | |||
| } | |||
| auto fp32_outputs = fp32_session->GetOutputs(); | |||
| for (const auto &kv : fp32_outputs) { | |||
| auto *tensor = kv.second; | |||
| auto *lite_tensor = reinterpret_cast<lite::Tensor *>(tensor); | |||
| if (lite_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "not lite tensor"; | |||
| delete fp32_sm.session; | |||
| delete fp32_sm.model; | |||
| return RET_ERROR; | |||
| } | |||
| auto *new_tensor = Tensor::CopyTensor(*lite_tensor, true); | |||
| fp32_output_tensors_[i][kv.first] = new_tensor; | |||
| } | |||
| } | |||
| delete fp32_sm.session; | |||
| delete fp32_sm.model; | |||
| return RET_OK; | |||
| } | |||
| STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) { | |||
| // 0.2 Parse input calib files | |||
| auto status = CollectCalibInputs(config_param_.image_paths, config_param_.batch_count, &images_); | |||
| if (status != RET_OK) { | |||
| @@ -454,6 +518,12 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) { | |||
| return RET_ERROR; | |||
| } | |||
| MS_LOG(DEBUG) << "run fp32 model"; | |||
| status = RunFp32Graph(func_graph); | |||
| if (status != RET_OK) { | |||
| return RET_ERROR; | |||
| } | |||
| auto cnodes = func_graph->GetOrderedCnodes(); | |||
| for (auto &cnode : cnodes) { | |||
| auto op_type = NodePrimitiveType(cnode); | |||
| @@ -471,6 +541,13 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) { | |||
| } | |||
| } | |||
| } | |||
| auto image_cnt = images_.at(0).size(); | |||
| if (!config_param_.input_shapes.empty()) { | |||
| if (config_param_.input_shapes.size() != image_cnt) { | |||
| MS_LOG(ERROR) << "input_shapes size: " << config_param_.input_shapes.size() << " image_cnt: " << image_cnt; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| for (auto iter = cnodes.end(); iter != cnodes.begin();) { | |||
| auto cnode = *(--iter); | |||
| @@ -540,66 +617,58 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) { | |||
| // 2. evaluate the quant | |||
| // 2.1 create quant session, get input, output tensor | |||
| flags.quantType = schema::QuantType_WeightQuant; | |||
| auto quant_session = | |||
| std::unique_ptr<session::LiteSession>(CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num)); | |||
| auto quant_sm = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num); | |||
| auto quant_session = std::unique_ptr<session::LiteSession>(quant_sm.session); | |||
| if (quant_session == nullptr) { | |||
| MS_LOG(ERROR) << "create session error: " << status; | |||
| delete quant_sm.model; | |||
| return RET_ERROR; | |||
| } | |||
| auto quant_inputs = quant_session->GetInputs(); | |||
| auto mean_error = 0.0f; | |||
| if (fp32_inputs.size() != images_.size()) { | |||
| MS_LOG(ERROR) << "model's input tensor cnt: " << fp32_inputs.size() << " != " << images_.size(); | |||
| return RET_ERROR; | |||
| } | |||
| auto image_cnt = images_.at(0).size(); | |||
| for (size_t i = 0; i < image_cnt; i++) { | |||
| // set multi-input data | |||
| for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) { | |||
| status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]); | |||
| if (!config_param_.input_shapes.empty()) { | |||
| status = quant_session->Resize(quant_inputs, {config_param_.input_shapes[i]}); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "generate input data from images failed!"; | |||
| MS_LOG(ERROR) << "session Resize fail"; | |||
| delete quant_sm.model; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| // set multi-input data | |||
| for (size_t input_index = 0; input_index < quant_inputs.size(); input_index++) { | |||
| status = CopyInputDataToTensor(input_index, i, images_, quant_inputs[input_index]); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "generate input data from images failed!"; | |||
| delete quant_sm.model; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| std::future<STATUS> fp32_inference = std::async( | |||
| std::launch::async, [](session::LiteSession *fp32_session) -> STATUS { return fp32_session->RunGraph(); }, | |||
| fp32_session_); | |||
| status = quant_session->RunGraph(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "quant session run error"; | |||
| return RET_ERROR; | |||
| } | |||
| status = fp32_inference.get(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "fp32 session run error"; | |||
| delete quant_sm.model; | |||
| return RET_ERROR; | |||
| } | |||
| // 3. compare betwen quant and fp32 | |||
| auto fp32_outputs = fp32_session_->GetOutputs(); | |||
| auto quant_outputs = quant_session->GetOutputs(); | |||
| mean_error += CompareOutputData<float>(fp32_outputs, quant_outputs); | |||
| mean_error += CompareOutputData<float>(fp32_output_tensors_[i], quant_outputs); | |||
| } // end_for: calib data loop | |||
| delete quant_sm.model; | |||
| mean_error = mean_error / image_cnt; | |||
| if (mean_error <= config_param_.mean_error_threshold) { | |||
| MS_LOG(DEBUG) << "op: " << op_name << " got mixed bit: " << bit_num_t << " mean_error: " << mean_error; | |||
| opname_bit_[op_name] = bit_num_t; | |||
| break; | |||
| } else if (bit_num_t != 8) { | |||
| MS_LOG(DEBUG) << "op: " << op_name << " intermediate bit: " << bit_num_t << " mean_error: " << mean_error | |||
| << " [recover]"; | |||
| // recover | |||
| param_value->set_tensor_size(sizeof(float) * elem_count); | |||
| ret = memcpy_s(raw_data, param_value->tensor_size(), origin_data, sizeof(float) * elem_count); | |||
| if (ret != EOK) { | |||
| MS_LOG(ERROR) << "memcpy fail: " | |||
| << " src size: " << sizeof(float) * elem_count << " dst size: " << param_value->tensor_size(); | |||
| status = UpdateTensorDataAndSize(param_value, origin_data, sizeof(float) * elem_count); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize fail"; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| @@ -610,6 +679,9 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) { | |||
| free(origin_data); | |||
| } // if: conv and matmul | |||
| } // end loop: all cnode | |||
| for (const auto &kv : opname_bit_) { | |||
| MS_LOG(INFO) << "op: " << kv.first << " bit:" << kv.second; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include <future> | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <map> | |||
| #include <list> | |||
| #include <string> | |||
| @@ -59,11 +60,12 @@ class WeightQuantizer : public Quantizer { | |||
| std::string config_file_; | |||
| PostQuantConfig config_param_; | |||
| std::vector<std::vector<std::string>> images_; // multi_input, [[mode_input_0], [model_input_1]...] | |||
| session::LiteSession *fp32_session_ = nullptr; | |||
| std::vector<std::unordered_map<std::string, mindspore::tensor::MSTensor *>> fp32_output_tensors_; | |||
| STATUS DoMiexedQuant(FuncGraphPtr); | |||
| STATUS SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node, std::shared_ptr<PrimitiveC> primitive_c); | |||
| STATUS DoFixedQuant(FuncGraphPtr); | |||
| STATUS RunFp32Graph(FuncGraphPtr); | |||
| }; | |||
| } // namespace mindspore::lite::quant | |||
| #endif | |||