Merge pull request !3130 from vlne-v1/quant_op_depthwisetags/v0.6.0-beta
| @@ -383,16 +383,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) { | |||
| MS_LOG(INFO) << "End save compiled func graph!"; | |||
| } | |||
| bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const { | |||
| std::string phase_prefix = GetPhasePrefix(phase_s); | |||
| if (use_vm && phase_prefix == "export") { | |||
| MS_LOG(INFO) << "Use ge backend to export geir"; | |||
| use_vm = false; | |||
| } | |||
| return use_vm; | |||
| } | |||
| void ExecutorPy::GetGeBackendPolicy() const { | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| @@ -402,6 +392,40 @@ void ExecutorPy::GetGeBackendPolicy() const { | |||
| } | |||
| } | |||
| bool IsPhaseExportGeir(const std::string &phase_s) { | |||
| auto phase_to_export = "export.geir"; | |||
| return phase_s.rfind(phase_to_export, 0) != std::string::npos; | |||
| } | |||
| std::vector<ActionItem> GetPipline(const ResourcePtr &resource, const std::string &phase_s, bool use_vm) { | |||
| bool is_geir = IsPhaseExportGeir(phase_s); | |||
| std::string backend = MsContext::GetInstance()->backend_policy(); | |||
| #if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES) | |||
| if (mindspore::parallel::ps::Util::IsParamServerMode()) { | |||
| mindspore::parallel::ps::Util::SetInternalEnvVar(); | |||
| } | |||
| if (parallel::ps::Util::IsRoleOfPServer()) { | |||
| resource->results()[kBackend] = compile::CreateBackend(); | |||
| return PServerPipeline(); | |||
| } | |||
| if (parallel::ps::Util::IsRoleOfScheduler()) { | |||
| return PSchedulerPipeline(); | |||
| } | |||
| #endif | |||
| if (use_vm && backend != "ge" && !is_geir) { | |||
| // Create backend and session | |||
| auto backend_ptr = compile::CreateBackend(); | |||
| // Connect session to debugger | |||
| backend_ptr->SetDebugger(); | |||
| resource->results()[kBackend] = backend_ptr; | |||
| return VmPipeline(); | |||
| } | |||
| return GePipeline(); | |||
| } | |||
| bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) { | |||
| MS_LOG(DEBUG) << "Start ExecutorPy compile!"; | |||
| if ((!py::isinstance<py::str>(phase))) { | |||
| @@ -420,43 +444,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons | |||
| std::string phase_s = py::cast<std::string>(phase); | |||
| MS_LOG(INFO) << "ExecutorPy compile phase:" << phase_s << "!"; | |||
| ResourcePtr resource = std::make_shared<Resource>(obj); | |||
| std::vector<ActionItem> p_actions; | |||
| use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s); | |||
| std::string backend = MsContext::GetInstance()->backend_policy(); | |||
| #if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES) | |||
| if (mindspore::parallel::ps::Util::IsParamServerMode()) { | |||
| mindspore::parallel::ps::Util::SetInternalEnvVar(); | |||
| } | |||
| if (parallel::ps::Util::IsRoleOfPServer()) { | |||
| resource->results()[kBackend] = compile::CreateBackend(); | |||
| p_actions = PServerPipeline(); | |||
| } else if (parallel::ps::Util::IsRoleOfScheduler()) { | |||
| p_actions = PSchedulerPipeline(); | |||
| } else if (use_vm && backend != "ge") { | |||
| // Create backend and session | |||
| auto backend_ptr = compile::CreateBackend(); | |||
| // Connect session to debugger | |||
| backend_ptr->SetDebugger(); | |||
| resource->results()[kBackend] = backend_ptr; | |||
| p_actions = VmPipeline(); | |||
| } else { | |||
| p_actions = GePipeline(); | |||
| } | |||
| #else | |||
| if (use_vm && backend != "ge") { | |||
| // Create backend and session | |||
| auto backend_ptr = compile::CreateBackend(); | |||
| // Connect session to debugger | |||
| backend_ptr->SetDebugger(); | |||
| resource->results()[kBackend] = backend_ptr; | |||
| p_actions = VmPipeline(); | |||
| } else { | |||
| p_actions = GePipeline(); | |||
| } | |||
| #endif | |||
| auto p_actions = GetPipline(resource, phase_s, use_vm); | |||
| std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s)); | |||
| // get the parameters items and add the value to args_spec | |||
| @@ -490,8 +479,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons | |||
| } | |||
| std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) { | |||
| // phase does not contain 'export_onnx' | |||
| if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) { | |||
| // filter action after validate when 'export'. | |||
| if (GetPhasePrefix(phase).rfind("export", 0) == std::string::npos) { | |||
| return actions; | |||
| } | |||
| MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'"; | |||
| @@ -101,7 +101,6 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> { | |||
| private: | |||
| ExecutorPy(); | |||
| void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors); | |||
| bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const; | |||
| void GetGeBackendPolicy() const; | |||
| // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after | |||
| // 'validate' stage | |||
| @@ -205,8 +205,8 @@ const char kNameL2Loss[] = "L2Loss"; | |||
| const char kNameCTCLoss[] = "CTCLoss"; | |||
| const char kNameRange[] = "Range"; | |||
| const char kNameSquareSumAll[] = "SquareSumAll"; | |||
| const char kNameAscendQuant[] = "AscendQuant"; | |||
| const char kNameAscendDequant[] = "AscendDequant"; | |||
| const char kNameAscendQuant[] = "Quant"; | |||
| const char kNameAscendDequant[] = "Dequant"; | |||
| const char kNameCase[] = "Case"; | |||
| // -----------------OpAdapter initialization-------------- | |||
| @@ -1107,7 +1107,7 @@ class QuantBlock(Cell): | |||
| r""" | |||
| A quant block of Conv/Dense, activation layer for Ascend deploy. | |||
| Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant. | |||
| Calculate Conv or Dense in Int8, with Quant and DeQuant. | |||
| Notes: | |||
| This block is only for deploy, and not trainable. | |||
| @@ -160,7 +160,7 @@ class Range(PrimitiveWithInfer): | |||
| return x_dtype | |||
| class AscendQuant(PrimitiveWithInfer): | |||
| class Quant(PrimitiveWithInfer): | |||
| r""" | |||
| Returns the quantized value of input_x. | |||
| @@ -192,7 +192,7 @@ class AscendQuant(PrimitiveWithInfer): | |||
| Examples: | |||
| >>> input_x = Tensor([100.0, 150.0], mstype.float32) | |||
| >>> quant = P.AscendQuant(80.0, 0.0, False, "Round") | |||
| >>> quant = P.Quant(80.0, 0.0, False, "Round") | |||
| >>> y = quant(input_x) | |||
| """ | |||
| @@ -213,7 +213,7 @@ class AscendQuant(PrimitiveWithInfer): | |||
| return mstype.int8 | |||
| class AscendDequant(PrimitiveWithInfer): | |||
| class Dequant(PrimitiveWithInfer): | |||
| r""" | |||
| Returns the dequantized value of input_x. | |||
| This operation will do ReLU to the dequantized value if `relu_flag` is True. | |||
| @@ -245,7 +245,7 @@ class AscendDequant(PrimitiveWithInfer): | |||
| Examples: | |||
| >>> input_x = Tensor([100.0, 150.0], mstype.float32) | |||
| >>> dequant = P.AscendDequant(False, False) | |||
| >>> dequant = P.Dequant(False, False) | |||
| >>> y = dequant(input_x) | |||
| """ | |||
| @prim_attr_register | |||
| @@ -329,14 +329,14 @@ class ExportToQuantInferNetwork: | |||
| return None | |||
| # Build the `Quant` `Dequant` op. | |||
| # AscendQuant only support perlayer version. Need check here. | |||
| quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in)) | |||
| # Quant only support perlayer version. Need check here. | |||
| quant_op = inner.Quant(float(scale_a_in), float(zp_a_in)) | |||
| sqrt_mode = False | |||
| scale_deq = scale_a_out * scale_w | |||
| if (scale_deq < 2 ** -14).all(): | |||
| scale_deq = np.sqrt(scale_deq) | |||
| sqrt_mode = True | |||
| dequant_op = inner.AscendDequant(sqrt_mode) | |||
| dequant_op = inner.Dequant(sqrt_mode) | |||
| # get op | |||
| op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv | |||
| @@ -411,11 +411,15 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format=' | |||
| file_name (str): File name of model to export. | |||
| mean (int): Input data mean. Default: 127.5. | |||
| std_dev (int, float): Input data variance. Default: 127.5. | |||
| file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model. | |||
| - GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model. | |||
| file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported | |||
| quantization aware model. Default: 'GEIR'. | |||
| - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of | |||
| Ascend model. | |||
| - BINARY: Binary format for model. An intermidiate representation format for models. | |||
| """ | |||
| supported_device = ["Ascend"] | |||
| supported_formats = ['GEIR'] | |||
| supported_formats = ['GEIR', 'BINARY'] | |||
| mean = validator.check_type("mean", mean, (int, float)) | |||
| std_dev = validator.check_type("std_dev", std_dev, (int, float)) | |||
| @@ -428,10 +432,9 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format=' | |||
| network.set_train(False) | |||
| if file_format == 'GEIR': | |||
| exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs) | |||
| deploy_net = exporter.run() | |||
| serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format) | |||
| exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs) | |||
| deploy_net = exporter.run() | |||
| serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format) | |||
| def convert_quant_network(network, | |||
| @@ -104,7 +104,7 @@ def weight2int(data, scale, zero_point): | |||
| raise ValueError("`scale` and `zero_point` should have the same shape.") | |||
| if scale.shape[0] < 0: | |||
| raise ValueError("`scale` and `zero_point` shape should greater than zero.") | |||
| if len(scale.shape) > 1: | |||
| if len(scale.shape) >= 1 and scale.shape[0] > 1: | |||
| # for perchannel | |||
| if scale.shape[0] == data.shape[0]: | |||
| # `Conv2d` or `Dense` op weight | |||
| @@ -454,19 +454,20 @@ def export(net, *inputs, file_name, file_format='GEIR'): | |||
| # export model | |||
| net.init_parameters_data() | |||
| if file_format == 'GEIR': | |||
| _executor.compile(net, *inputs, phase='export') | |||
| phase_name = 'export.geir' | |||
| _executor.compile(net, *inputs, phase=phase_name) | |||
| _executor.export(net, file_name, file_format) | |||
| elif file_format == 'ONNX': # file_format is 'ONNX' | |||
| # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline, | |||
| # do not change it to other values. | |||
| phase_name = 'export_onnx' | |||
| phase_name = 'export.onnx' | |||
| graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) | |||
| onnx_stream = _executor._get_func_graph_proto(graph_id) | |||
| with open(file_name, 'wb') as f: | |||
| os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) | |||
| f.write(onnx_stream) | |||
| elif file_format == 'BINARY': # file_format is 'BINARY' | |||
| phase_name = 'export_binary' | |||
| phase_name = 'export.binary' | |||
| graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) | |||
| onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir') | |||
| with open(file_name, 'wb') as f: | |||
| @@ -2180,36 +2180,36 @@ test_case_other_ops = [ | |||
| ] | |||
| test_case_quant_ops = [ | |||
| ('AscendQuant_1', { | |||
| 'block': inner.AscendQuant(0.5, 0.0, False, "Round"), | |||
| ('Quant_1', { | |||
| 'block': inner.Quant(0.5, 0.0, False, "Round"), | |||
| 'desc_inputs': [Tensor(np.random.rand(1, 2, 4, 4), mstype.float32)], | |||
| 'skip': ['backward']}), | |||
| ('AscendQuant_2', { | |||
| 'block': inner.AscendQuant(80.0, 10.0, True, "Round"), | |||
| ('Quant_2', { | |||
| 'block': inner.Quant(80.0, 10.0, True, "Round"), | |||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | |||
| 'skip': ['backward']}), | |||
| ('AscendQuant_3', { | |||
| 'block': inner.AscendQuant(80.0, 0.0, False, "Floor"), | |||
| ('Quant_3', { | |||
| 'block': inner.Quant(80.0, 0.0, False, "Floor"), | |||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | |||
| 'skip': ['backward']}), | |||
| ('AscendQuant_4', { | |||
| 'block': inner.AscendQuant(80.0, 0.0, False, "Ceil"), | |||
| ('Quant_4', { | |||
| 'block': inner.Quant(80.0, 0.0, False, "Ceil"), | |||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | |||
| 'skip': ['backward']}), | |||
| ('AscendQuant_5', { | |||
| 'block': inner.AscendQuant(80.0, 0.0, False, "Trunc"), | |||
| ('Quant_5', { | |||
| 'block': inner.Quant(80.0, 0.0, False, "Trunc"), | |||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | |||
| 'skip': ['backward']}), | |||
| ('AscendQuant_6', { | |||
| 'block': inner.AscendQuant(-80.0, 10.0, False, "Round"), | |||
| ('Quant_6', { | |||
| 'block': inner.Quant(-80.0, 10.0, False, "Round"), | |||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | |||
| 'skip': ['backward']}), | |||
| ('AscendQuant_7', { | |||
| 'block': inner.AscendQuant(80.0, -10.0, False, "Round"), | |||
| ('Quant_7', { | |||
| 'block': inner.Quant(80.0, -10.0, False, "Round"), | |||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | |||
| 'skip': ['backward']}), | |||
| ('AscendQuant_8', { | |||
| 'block': inner.AscendQuant(80.0, 10.0, False, "Round"), | |||
| ('Quant_8', { | |||
| 'block': inner.Quant(80.0, 10.0, False, "Round"), | |||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float16)], | |||
| 'skip': ['backward']}), | |||
| ] | |||
| @@ -75,10 +75,20 @@ def test_qat_lenet(): | |||
| @pytest.mark.skip(reason="no `te.lang.cce` in ut env") | |||
| def test_qat_mobile(): | |||
| def test_qat_mobile_per_channel_tf(): | |||
| network = mobilenetV2(num_classes=1000) | |||
| img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) | |||
| network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) | |||
| network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, True], symmetric=[True, False]) | |||
| # should load the checkpoint. mock here | |||
| for param in network.get_parameters(): | |||
| param.init_data() | |||
| qat.export(network, img, file_name="quant.pb") | |||
| @pytest.mark.skip(reason="no `te.lang.cce` in ut env") | |||
| def test_qat_mobile_per_channel_ff(): | |||
| network = mobilenetV2(num_classes=1000) | |||
| img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) | |||
| network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, False], symmetric=[True, False]) | |||
| # should load the checkpoint. mock here | |||
| for param in network.get_parameters(): | |||
| param.init_data() | |||