!3130 [dump]support dump quant in binary format file

Merge pull request !3130 from vlne-v1/quant_op_depthwise
5 years ago · 6dd99ee35d
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@@ -383,16 +383,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) {
  MS_LOG(INFO) << "End save compiled func graph!";
 }

 bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const {
  std::string phase_prefix = GetPhasePrefix(phase_s);

  if (use_vm && phase_prefix == "export") {
    MS_LOG(INFO) << "Use ge backend to export geir";
    use_vm = false;
  }
  return use_vm;
 }

 void ExecutorPy::GetGeBackendPolicy() const {
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
@@ -402,6 +392,40 @@ void ExecutorPy::GetGeBackendPolicy() const {
  }
 }

 bool IsPhaseExportGeir(const std::string &phase_s) {
  auto phase_to_export = "export.geir";
  return phase_s.rfind(phase_to_export, 0) != std::string::npos;
 }

 std::vector<ActionItem> GetPipline(const ResourcePtr &resource, const std::string &phase_s, bool use_vm) {
  bool is_geir = IsPhaseExportGeir(phase_s);

  std::string backend = MsContext::GetInstance()->backend_policy();

 #if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES)
  if (mindspore::parallel::ps::Util::IsParamServerMode()) {
    mindspore::parallel::ps::Util::SetInternalEnvVar();
  }
  if (parallel::ps::Util::IsRoleOfPServer()) {
    resource->results()[kBackend] = compile::CreateBackend();
    return PServerPipeline();
  }
  if (parallel::ps::Util::IsRoleOfScheduler()) {
    return PSchedulerPipeline();
  }
 #endif

  if (use_vm && backend != "ge" && !is_geir) {
    // Create backend and session
    auto backend_ptr = compile::CreateBackend();
    // Connect session to debugger
    backend_ptr->SetDebugger();
    resource->results()[kBackend] = backend_ptr;
    return VmPipeline();
  }
  return GePipeline();
 }

 bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) {
  MS_LOG(DEBUG) << "Start ExecutorPy compile!";
  if ((!py::isinstance<py::str>(phase))) {
@@ -420,43 +444,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
  std::string phase_s = py::cast<std::string>(phase);
  MS_LOG(INFO) << "ExecutorPy compile phase:" << phase_s << "!";
  ResourcePtr resource = std::make_shared<Resource>(obj);
  std::vector<ActionItem> p_actions;

  use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s);

  std::string backend = MsContext::GetInstance()->backend_policy();
 #if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES)
  if (mindspore::parallel::ps::Util::IsParamServerMode()) {
    mindspore::parallel::ps::Util::SetInternalEnvVar();
  }
  if (parallel::ps::Util::IsRoleOfPServer()) {
    resource->results()[kBackend] = compile::CreateBackend();
    p_actions = PServerPipeline();
  } else if (parallel::ps::Util::IsRoleOfScheduler()) {
    p_actions = PSchedulerPipeline();
  } else if (use_vm && backend != "ge") {
    // Create backend and session
    auto backend_ptr = compile::CreateBackend();
    // Connect session to debugger
    backend_ptr->SetDebugger();
    resource->results()[kBackend] = backend_ptr;
    p_actions = VmPipeline();
  } else {
    p_actions = GePipeline();
  }
 #else
  if (use_vm && backend != "ge") {
    // Create backend and session
    auto backend_ptr = compile::CreateBackend();
    // Connect session to debugger
    backend_ptr->SetDebugger();
    resource->results()[kBackend] = backend_ptr;
    p_actions = VmPipeline();
  } else {
    p_actions = GePipeline();
  }
 #endif

  auto p_actions = GetPipline(resource, phase_s, use_vm);
  std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s));

  // get the parameters items and add the value to args_spec
@@ -490,8 +479,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
 }

 std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) {
  // phase does not contain 'export_onnx'
  if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) {
  // filter action after validate when 'export'.
  if (GetPhasePrefix(phase).rfind("export", 0) == std::string::npos) {
    return actions;
  }
  MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'";
--- a/mindspore/ccsrc/pipeline/jit/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@@ -101,7 +101,6 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
 private:
  ExecutorPy();
  void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors);
  bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const;
  void GetGeBackendPolicy() const;
  // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
  // 'validate' stage
--- a/mindspore/ccsrc/transform/graph_ir/convert.cc
+++ b/mindspore/ccsrc/transform/graph_ir/convert.cc
@@ -205,8 +205,8 @@ const char kNameL2Loss[] = "L2Loss";
 const char kNameCTCLoss[] = "CTCLoss";
 const char kNameRange[] = "Range";
 const char kNameSquareSumAll[] = "SquareSumAll";
 const char kNameAscendQuant[] = "AscendQuant";
 const char kNameAscendDequant[] = "AscendDequant";
 const char kNameAscendQuant[] = "Quant";
 const char kNameAscendDequant[] = "Dequant";
 const char kNameCase[] = "Case";

 // -----------------OpAdapter initialization--------------
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -1107,7 +1107,7 @@ class QuantBlock(Cell):
    r"""
    A quant block of Conv/Dense, activation layer for Ascend deploy.

    Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant.
    Calculate Conv or Dense in Int8, with Quant and DeQuant.

    Notes:
        This block is only for deploy, and not trainable.
--- a/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/ops/operations/_inner_ops.py
@@ -160,7 +160,7 @@ class Range(PrimitiveWithInfer):
        return x_dtype


 class AscendQuant(PrimitiveWithInfer):
 class Quant(PrimitiveWithInfer):
    r"""
    Returns the quantized value of input_x.

@@ -192,7 +192,7 @@ class AscendQuant(PrimitiveWithInfer):

    Examples:
        >>> input_x = Tensor([100.0, 150.0], mstype.float32)
        >>> quant = P.AscendQuant(80.0, 0.0, False, "Round")
        >>> quant = P.Quant(80.0, 0.0, False, "Round")
        >>> y = quant(input_x)
    """

@@ -213,7 +213,7 @@ class AscendQuant(PrimitiveWithInfer):
        return mstype.int8


 class AscendDequant(PrimitiveWithInfer):
 class Dequant(PrimitiveWithInfer):
    r"""
    Returns the dequantized value of input_x.
    This operation will do ReLU to the dequantized value if `relu_flag` is True.
@@ -245,7 +245,7 @@ class AscendDequant(PrimitiveWithInfer):

    Examples:
        >>> input_x = Tensor([100.0, 150.0], mstype.float32)
        >>> dequant = P.AscendDequant(False, False)
        >>> dequant = P.Dequant(False, False)
        >>> y = dequant(input_x)
    """
    @prim_attr_register
--- a/mindspore/train/quant/quant.py
+++ b/mindspore/train/quant/quant.py
@@ -329,14 +329,14 @@ class ExportToQuantInferNetwork:
            return None

        # Build the `Quant` `Dequant` op.
        # AscendQuant only support perlayer version. Need check here.
        quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in))
        # Quant only support perlayer version. Need check here.
        quant_op = inner.Quant(float(scale_a_in), float(zp_a_in))
        sqrt_mode = False
        scale_deq = scale_a_out * scale_w
        if (scale_deq < 2 ** -14).all():
            scale_deq = np.sqrt(scale_deq)
            sqrt_mode = True
        dequant_op = inner.AscendDequant(sqrt_mode)
        dequant_op = inner.Dequant(sqrt_mode)

        # get op
        op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv
@@ -411,11 +411,15 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='
        file_name (str): File name of model to export.
        mean (int): Input data mean. Default: 127.5.
        std_dev (int, float): Input data variance. Default: 127.5.
        file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model.
            - GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model.
        file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported
            quantization aware model. Default: 'GEIR'.

            - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
              Ascend model.
            - BINARY: Binary format for model. An intermidiate representation format for models.
    """
    supported_device = ["Ascend"]
    supported_formats = ['GEIR']
    supported_formats = ['GEIR', 'BINARY']

    mean = validator.check_type("mean", mean, (int, float))
    std_dev = validator.check_type("std_dev", std_dev, (int, float))
@@ -428,10 +432,9 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='

    network.set_train(False)

    if file_format == 'GEIR':
        exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
        deploy_net = exporter.run()
        serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)
    exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
    deploy_net = exporter.run()
    serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)


 def convert_quant_network(network,
--- a/mindspore/train/quant/quant_utils.py
+++ b/mindspore/train/quant/quant_utils.py
@@ -104,7 +104,7 @@ def weight2int(data, scale, zero_point):
        raise ValueError("`scale` and `zero_point` should have the same shape.")
    if scale.shape[0] < 0:
        raise ValueError("`scale` and `zero_point` shape should greater than zero.")
    if len(scale.shape) > 1:
    if len(scale.shape) >= 1 and scale.shape[0] > 1:
        # for perchannel
        if scale.shape[0] == data.shape[0]:
            # `Conv2d` or `Dense` op weight
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -454,19 +454,20 @@ def export(net, *inputs, file_name, file_format='GEIR'):
    # export model
    net.init_parameters_data()
    if file_format == 'GEIR':
        _executor.compile(net, *inputs, phase='export')
        phase_name = 'export.geir'
        _executor.compile(net, *inputs, phase=phase_name)
        _executor.export(net, file_name, file_format)
    elif file_format == 'ONNX':  # file_format is 'ONNX'
        # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline,
        #         do not change it to other values.
        phase_name = 'export_onnx'
        phase_name = 'export.onnx'
        graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
        onnx_stream = _executor._get_func_graph_proto(graph_id)
        with open(file_name, 'wb') as f:
            os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
            f.write(onnx_stream)
    elif file_format == 'BINARY':  # file_format is 'BINARY'
        phase_name = 'export_binary'
        phase_name = 'export.binary'
        graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
        onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir')
        with open(file_name, 'wb') as f:
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -2180,36 +2180,36 @@ test_case_other_ops = [
 ]

 test_case_quant_ops = [
    ('AscendQuant_1', {
        'block': inner.AscendQuant(0.5, 0.0, False, "Round"),
    ('Quant_1', {
        'block': inner.Quant(0.5, 0.0, False, "Round"),
        'desc_inputs': [Tensor(np.random.rand(1, 2, 4, 4), mstype.float32)],
        'skip': ['backward']}),
    ('AscendQuant_2', {
        'block': inner.AscendQuant(80.0, 10.0, True, "Round"),
    ('Quant_2', {
        'block': inner.Quant(80.0, 10.0, True, "Round"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
    ('AscendQuant_3', {
        'block': inner.AscendQuant(80.0, 0.0, False, "Floor"),
    ('Quant_3', {
        'block': inner.Quant(80.0, 0.0, False, "Floor"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
    ('AscendQuant_4', {
        'block': inner.AscendQuant(80.0, 0.0, False, "Ceil"),
    ('Quant_4', {
        'block': inner.Quant(80.0, 0.0, False, "Ceil"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
    ('AscendQuant_5', {
        'block': inner.AscendQuant(80.0, 0.0, False, "Trunc"),
    ('Quant_5', {
        'block': inner.Quant(80.0, 0.0, False, "Trunc"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
    ('AscendQuant_6', {
        'block': inner.AscendQuant(-80.0, 10.0, False, "Round"),
    ('Quant_6', {
        'block': inner.Quant(-80.0, 10.0, False, "Round"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
    ('AscendQuant_7', {
        'block': inner.AscendQuant(80.0, -10.0, False, "Round"),
    ('Quant_7', {
        'block': inner.Quant(80.0, -10.0, False, "Round"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
    ('AscendQuant_8', {
        'block': inner.AscendQuant(80.0, 10.0, False, "Round"),
    ('Quant_8', {
        'block': inner.Quant(80.0, 10.0, False, "Round"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float16)],
        'skip': ['backward']}),
 ]
--- a/tests/ut/python/train/quant/test_quant.py
+++ b/tests/ut/python/train/quant/test_quant.py
@@ -75,10 +75,20 @@ def test_qat_lenet():


@pytest.mark.skip(reason="no `te.lang.cce` in ut env")
 def test_qat_mobile():
 def test_qat_mobile_per_channel_tf():
    network = mobilenetV2(num_classes=1000)
    img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
    network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
    network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, True], symmetric=[True, False])
    # should load the checkpoint. mock here
    for param in network.get_parameters():
        param.init_data()
    qat.export(network, img, file_name="quant.pb")

@pytest.mark.skip(reason="no `te.lang.cce` in ut env")
 def test_qat_mobile_per_channel_ff():
    network = mobilenetV2(num_classes=1000)
    img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
    network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, False], symmetric=[True, False])
    # should load the checkpoint. mock here
    for param in network.get_parameters():
        param.init_data()