Merge pull request !3130 from vlne-v1/quant_op_depthwisetags/v0.6.0-beta
| @@ -383,16 +383,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) { | |||||
| MS_LOG(INFO) << "End save compiled func graph!"; | MS_LOG(INFO) << "End save compiled func graph!"; | ||||
| } | } | ||||
| bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const { | |||||
| std::string phase_prefix = GetPhasePrefix(phase_s); | |||||
| if (use_vm && phase_prefix == "export") { | |||||
| MS_LOG(INFO) << "Use ge backend to export geir"; | |||||
| use_vm = false; | |||||
| } | |||||
| return use_vm; | |||||
| } | |||||
| void ExecutorPy::GetGeBackendPolicy() const { | void ExecutorPy::GetGeBackendPolicy() const { | ||||
| auto ms_context = MsContext::GetInstance(); | auto ms_context = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(ms_context); | MS_EXCEPTION_IF_NULL(ms_context); | ||||
| @@ -402,6 +392,40 @@ void ExecutorPy::GetGeBackendPolicy() const { | |||||
| } | } | ||||
| } | } | ||||
| bool IsPhaseExportGeir(const std::string &phase_s) { | |||||
| auto phase_to_export = "export.geir"; | |||||
| return phase_s.rfind(phase_to_export, 0) != std::string::npos; | |||||
| } | |||||
| std::vector<ActionItem> GetPipline(const ResourcePtr &resource, const std::string &phase_s, bool use_vm) { | |||||
| bool is_geir = IsPhaseExportGeir(phase_s); | |||||
| std::string backend = MsContext::GetInstance()->backend_policy(); | |||||
| #if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES) | |||||
| if (mindspore::parallel::ps::Util::IsParamServerMode()) { | |||||
| mindspore::parallel::ps::Util::SetInternalEnvVar(); | |||||
| } | |||||
| if (parallel::ps::Util::IsRoleOfPServer()) { | |||||
| resource->results()[kBackend] = compile::CreateBackend(); | |||||
| return PServerPipeline(); | |||||
| } | |||||
| if (parallel::ps::Util::IsRoleOfScheduler()) { | |||||
| return PSchedulerPipeline(); | |||||
| } | |||||
| #endif | |||||
| if (use_vm && backend != "ge" && !is_geir) { | |||||
| // Create backend and session | |||||
| auto backend_ptr = compile::CreateBackend(); | |||||
| // Connect session to debugger | |||||
| backend_ptr->SetDebugger(); | |||||
| resource->results()[kBackend] = backend_ptr; | |||||
| return VmPipeline(); | |||||
| } | |||||
| return GePipeline(); | |||||
| } | |||||
| bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) { | bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) { | ||||
| MS_LOG(DEBUG) << "Start ExecutorPy compile!"; | MS_LOG(DEBUG) << "Start ExecutorPy compile!"; | ||||
| if ((!py::isinstance<py::str>(phase))) { | if ((!py::isinstance<py::str>(phase))) { | ||||
| @@ -420,43 +444,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons | |||||
| std::string phase_s = py::cast<std::string>(phase); | std::string phase_s = py::cast<std::string>(phase); | ||||
| MS_LOG(INFO) << "ExecutorPy compile phase:" << phase_s << "!"; | MS_LOG(INFO) << "ExecutorPy compile phase:" << phase_s << "!"; | ||||
| ResourcePtr resource = std::make_shared<Resource>(obj); | ResourcePtr resource = std::make_shared<Resource>(obj); | ||||
| std::vector<ActionItem> p_actions; | |||||
| use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s); | |||||
| std::string backend = MsContext::GetInstance()->backend_policy(); | |||||
| #if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES) | |||||
| if (mindspore::parallel::ps::Util::IsParamServerMode()) { | |||||
| mindspore::parallel::ps::Util::SetInternalEnvVar(); | |||||
| } | |||||
| if (parallel::ps::Util::IsRoleOfPServer()) { | |||||
| resource->results()[kBackend] = compile::CreateBackend(); | |||||
| p_actions = PServerPipeline(); | |||||
| } else if (parallel::ps::Util::IsRoleOfScheduler()) { | |||||
| p_actions = PSchedulerPipeline(); | |||||
| } else if (use_vm && backend != "ge") { | |||||
| // Create backend and session | |||||
| auto backend_ptr = compile::CreateBackend(); | |||||
| // Connect session to debugger | |||||
| backend_ptr->SetDebugger(); | |||||
| resource->results()[kBackend] = backend_ptr; | |||||
| p_actions = VmPipeline(); | |||||
| } else { | |||||
| p_actions = GePipeline(); | |||||
| } | |||||
| #else | |||||
| if (use_vm && backend != "ge") { | |||||
| // Create backend and session | |||||
| auto backend_ptr = compile::CreateBackend(); | |||||
| // Connect session to debugger | |||||
| backend_ptr->SetDebugger(); | |||||
| resource->results()[kBackend] = backend_ptr; | |||||
| p_actions = VmPipeline(); | |||||
| } else { | |||||
| p_actions = GePipeline(); | |||||
| } | |||||
| #endif | |||||
| auto p_actions = GetPipline(resource, phase_s, use_vm); | |||||
| std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s)); | std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s)); | ||||
| // get the parameters items and add the value to args_spec | // get the parameters items and add the value to args_spec | ||||
| @@ -490,8 +479,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons | |||||
| } | } | ||||
| std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) { | std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) { | ||||
| // phase does not contain 'export_onnx' | |||||
| if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) { | |||||
| // filter action after validate when 'export'. | |||||
| if (GetPhasePrefix(phase).rfind("export", 0) == std::string::npos) { | |||||
| return actions; | return actions; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'"; | MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'"; | ||||
| @@ -101,7 +101,6 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> { | |||||
| private: | private: | ||||
| ExecutorPy(); | ExecutorPy(); | ||||
| void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors); | void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors); | ||||
| bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const; | |||||
| void GetGeBackendPolicy() const; | void GetGeBackendPolicy() const; | ||||
| // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after | // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after | ||||
| // 'validate' stage | // 'validate' stage | ||||
| @@ -205,8 +205,8 @@ const char kNameL2Loss[] = "L2Loss"; | |||||
| const char kNameCTCLoss[] = "CTCLoss"; | const char kNameCTCLoss[] = "CTCLoss"; | ||||
| const char kNameRange[] = "Range"; | const char kNameRange[] = "Range"; | ||||
| const char kNameSquareSumAll[] = "SquareSumAll"; | const char kNameSquareSumAll[] = "SquareSumAll"; | ||||
| const char kNameAscendQuant[] = "AscendQuant"; | |||||
| const char kNameAscendDequant[] = "AscendDequant"; | |||||
| const char kNameAscendQuant[] = "Quant"; | |||||
| const char kNameAscendDequant[] = "Dequant"; | |||||
| const char kNameCase[] = "Case"; | const char kNameCase[] = "Case"; | ||||
| // -----------------OpAdapter initialization-------------- | // -----------------OpAdapter initialization-------------- | ||||
| @@ -1107,7 +1107,7 @@ class QuantBlock(Cell): | |||||
| r""" | r""" | ||||
| A quant block of Conv/Dense, activation layer for Ascend deploy. | A quant block of Conv/Dense, activation layer for Ascend deploy. | ||||
| Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant. | |||||
| Calculate Conv or Dense in Int8, with Quant and DeQuant. | |||||
| Notes: | Notes: | ||||
| This block is only for deploy, and not trainable. | This block is only for deploy, and not trainable. | ||||
| @@ -160,7 +160,7 @@ class Range(PrimitiveWithInfer): | |||||
| return x_dtype | return x_dtype | ||||
| class AscendQuant(PrimitiveWithInfer): | |||||
| class Quant(PrimitiveWithInfer): | |||||
| r""" | r""" | ||||
| Returns the quantized value of input_x. | Returns the quantized value of input_x. | ||||
| @@ -192,7 +192,7 @@ class AscendQuant(PrimitiveWithInfer): | |||||
| Examples: | Examples: | ||||
| >>> input_x = Tensor([100.0, 150.0], mstype.float32) | >>> input_x = Tensor([100.0, 150.0], mstype.float32) | ||||
| >>> quant = P.AscendQuant(80.0, 0.0, False, "Round") | |||||
| >>> quant = P.Quant(80.0, 0.0, False, "Round") | |||||
| >>> y = quant(input_x) | >>> y = quant(input_x) | ||||
| """ | """ | ||||
| @@ -213,7 +213,7 @@ class AscendQuant(PrimitiveWithInfer): | |||||
| return mstype.int8 | return mstype.int8 | ||||
| class AscendDequant(PrimitiveWithInfer): | |||||
| class Dequant(PrimitiveWithInfer): | |||||
| r""" | r""" | ||||
| Returns the dequantized value of input_x. | Returns the dequantized value of input_x. | ||||
| This operation will do ReLU to the dequantized value if `relu_flag` is True. | This operation will do ReLU to the dequantized value if `relu_flag` is True. | ||||
| @@ -245,7 +245,7 @@ class AscendDequant(PrimitiveWithInfer): | |||||
| Examples: | Examples: | ||||
| >>> input_x = Tensor([100.0, 150.0], mstype.float32) | >>> input_x = Tensor([100.0, 150.0], mstype.float32) | ||||
| >>> dequant = P.AscendDequant(False, False) | |||||
| >>> dequant = P.Dequant(False, False) | |||||
| >>> y = dequant(input_x) | >>> y = dequant(input_x) | ||||
| """ | """ | ||||
| @prim_attr_register | @prim_attr_register | ||||
| @@ -329,14 +329,14 @@ class ExportToQuantInferNetwork: | |||||
| return None | return None | ||||
| # Build the `Quant` `Dequant` op. | # Build the `Quant` `Dequant` op. | ||||
| # AscendQuant only support perlayer version. Need check here. | |||||
| quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in)) | |||||
| # Quant only support perlayer version. Need check here. | |||||
| quant_op = inner.Quant(float(scale_a_in), float(zp_a_in)) | |||||
| sqrt_mode = False | sqrt_mode = False | ||||
| scale_deq = scale_a_out * scale_w | scale_deq = scale_a_out * scale_w | ||||
| if (scale_deq < 2 ** -14).all(): | if (scale_deq < 2 ** -14).all(): | ||||
| scale_deq = np.sqrt(scale_deq) | scale_deq = np.sqrt(scale_deq) | ||||
| sqrt_mode = True | sqrt_mode = True | ||||
| dequant_op = inner.AscendDequant(sqrt_mode) | |||||
| dequant_op = inner.Dequant(sqrt_mode) | |||||
| # get op | # get op | ||||
| op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv | op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv | ||||
| @@ -411,11 +411,15 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format=' | |||||
| file_name (str): File name of model to export. | file_name (str): File name of model to export. | ||||
| mean (int): Input data mean. Default: 127.5. | mean (int): Input data mean. Default: 127.5. | ||||
| std_dev (int, float): Input data variance. Default: 127.5. | std_dev (int, float): Input data variance. Default: 127.5. | ||||
| file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model. | |||||
| - GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model. | |||||
| file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported | |||||
| quantization aware model. Default: 'GEIR'. | |||||
| - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of | |||||
| Ascend model. | |||||
| - BINARY: Binary format for model. An intermidiate representation format for models. | |||||
| """ | """ | ||||
| supported_device = ["Ascend"] | supported_device = ["Ascend"] | ||||
| supported_formats = ['GEIR'] | |||||
| supported_formats = ['GEIR', 'BINARY'] | |||||
| mean = validator.check_type("mean", mean, (int, float)) | mean = validator.check_type("mean", mean, (int, float)) | ||||
| std_dev = validator.check_type("std_dev", std_dev, (int, float)) | std_dev = validator.check_type("std_dev", std_dev, (int, float)) | ||||
| @@ -428,10 +432,9 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format=' | |||||
| network.set_train(False) | network.set_train(False) | ||||
| if file_format == 'GEIR': | |||||
| exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs) | |||||
| deploy_net = exporter.run() | |||||
| serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format) | |||||
| exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs) | |||||
| deploy_net = exporter.run() | |||||
| serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format) | |||||
| def convert_quant_network(network, | def convert_quant_network(network, | ||||
| @@ -104,7 +104,7 @@ def weight2int(data, scale, zero_point): | |||||
| raise ValueError("`scale` and `zero_point` should have the same shape.") | raise ValueError("`scale` and `zero_point` should have the same shape.") | ||||
| if scale.shape[0] < 0: | if scale.shape[0] < 0: | ||||
| raise ValueError("`scale` and `zero_point` shape should greater than zero.") | raise ValueError("`scale` and `zero_point` shape should greater than zero.") | ||||
| if len(scale.shape) > 1: | |||||
| if len(scale.shape) >= 1 and scale.shape[0] > 1: | |||||
| # for perchannel | # for perchannel | ||||
| if scale.shape[0] == data.shape[0]: | if scale.shape[0] == data.shape[0]: | ||||
| # `Conv2d` or `Dense` op weight | # `Conv2d` or `Dense` op weight | ||||
| @@ -454,19 +454,20 @@ def export(net, *inputs, file_name, file_format='GEIR'): | |||||
| # export model | # export model | ||||
| net.init_parameters_data() | net.init_parameters_data() | ||||
| if file_format == 'GEIR': | if file_format == 'GEIR': | ||||
| _executor.compile(net, *inputs, phase='export') | |||||
| phase_name = 'export.geir' | |||||
| _executor.compile(net, *inputs, phase=phase_name) | |||||
| _executor.export(net, file_name, file_format) | _executor.export(net, file_name, file_format) | ||||
| elif file_format == 'ONNX': # file_format is 'ONNX' | elif file_format == 'ONNX': # file_format is 'ONNX' | ||||
| # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline, | # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline, | ||||
| # do not change it to other values. | # do not change it to other values. | ||||
| phase_name = 'export_onnx' | |||||
| phase_name = 'export.onnx' | |||||
| graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) | graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) | ||||
| onnx_stream = _executor._get_func_graph_proto(graph_id) | onnx_stream = _executor._get_func_graph_proto(graph_id) | ||||
| with open(file_name, 'wb') as f: | with open(file_name, 'wb') as f: | ||||
| os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) | os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) | ||||
| f.write(onnx_stream) | f.write(onnx_stream) | ||||
| elif file_format == 'BINARY': # file_format is 'BINARY' | elif file_format == 'BINARY': # file_format is 'BINARY' | ||||
| phase_name = 'export_binary' | |||||
| phase_name = 'export.binary' | |||||
| graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) | graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) | ||||
| onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir') | onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir') | ||||
| with open(file_name, 'wb') as f: | with open(file_name, 'wb') as f: | ||||
| @@ -2180,36 +2180,36 @@ test_case_other_ops = [ | |||||
| ] | ] | ||||
| test_case_quant_ops = [ | test_case_quant_ops = [ | ||||
| ('AscendQuant_1', { | |||||
| 'block': inner.AscendQuant(0.5, 0.0, False, "Round"), | |||||
| ('Quant_1', { | |||||
| 'block': inner.Quant(0.5, 0.0, False, "Round"), | |||||
| 'desc_inputs': [Tensor(np.random.rand(1, 2, 4, 4), mstype.float32)], | 'desc_inputs': [Tensor(np.random.rand(1, 2, 4, 4), mstype.float32)], | ||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('AscendQuant_2', { | |||||
| 'block': inner.AscendQuant(80.0, 10.0, True, "Round"), | |||||
| ('Quant_2', { | |||||
| 'block': inner.Quant(80.0, 10.0, True, "Round"), | |||||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | ||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('AscendQuant_3', { | |||||
| 'block': inner.AscendQuant(80.0, 0.0, False, "Floor"), | |||||
| ('Quant_3', { | |||||
| 'block': inner.Quant(80.0, 0.0, False, "Floor"), | |||||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | ||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('AscendQuant_4', { | |||||
| 'block': inner.AscendQuant(80.0, 0.0, False, "Ceil"), | |||||
| ('Quant_4', { | |||||
| 'block': inner.Quant(80.0, 0.0, False, "Ceil"), | |||||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | ||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('AscendQuant_5', { | |||||
| 'block': inner.AscendQuant(80.0, 0.0, False, "Trunc"), | |||||
| ('Quant_5', { | |||||
| 'block': inner.Quant(80.0, 0.0, False, "Trunc"), | |||||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | ||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('AscendQuant_6', { | |||||
| 'block': inner.AscendQuant(-80.0, 10.0, False, "Round"), | |||||
| ('Quant_6', { | |||||
| 'block': inner.Quant(-80.0, 10.0, False, "Round"), | |||||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | ||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('AscendQuant_7', { | |||||
| 'block': inner.AscendQuant(80.0, -10.0, False, "Round"), | |||||
| ('Quant_7', { | |||||
| 'block': inner.Quant(80.0, -10.0, False, "Round"), | |||||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], | ||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('AscendQuant_8', { | |||||
| 'block': inner.AscendQuant(80.0, 10.0, False, "Round"), | |||||
| ('Quant_8', { | |||||
| 'block': inner.Quant(80.0, 10.0, False, "Round"), | |||||
| 'desc_inputs': [Tensor([100.0, 200.0], mstype.float16)], | 'desc_inputs': [Tensor([100.0, 200.0], mstype.float16)], | ||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ] | ] | ||||
| @@ -75,10 +75,20 @@ def test_qat_lenet(): | |||||
| @pytest.mark.skip(reason="no `te.lang.cce` in ut env") | @pytest.mark.skip(reason="no `te.lang.cce` in ut env") | ||||
| def test_qat_mobile(): | |||||
| def test_qat_mobile_per_channel_tf(): | |||||
| network = mobilenetV2(num_classes=1000) | network = mobilenetV2(num_classes=1000) | ||||
| img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) | img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) | ||||
| network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) | |||||
| network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, True], symmetric=[True, False]) | |||||
| # should load the checkpoint. mock here | |||||
| for param in network.get_parameters(): | |||||
| param.init_data() | |||||
| qat.export(network, img, file_name="quant.pb") | |||||
| @pytest.mark.skip(reason="no `te.lang.cce` in ut env") | |||||
| def test_qat_mobile_per_channel_ff(): | |||||
| network = mobilenetV2(num_classes=1000) | |||||
| img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) | |||||
| network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, False], symmetric=[True, False]) | |||||
| # should load the checkpoint. mock here | # should load the checkpoint. mock here | ||||
| for param in network.get_parameters(): | for param in network.get_parameters(): | ||||
| param.init_data() | param.init_data() | ||||