Add more log when collect graph and use summary operators

Fix can not collect input data when batch size is 1 and total step number is 1 Fixed spelling errors
5 years ago · c6e4b0c85f
--- a/mindspore/ops/operations/debug_ops.py
+++ b/mindspore/ops/operations/debug_ops.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,14 +15,24 @@

 """debug_ops"""
 from types import FunctionType, MethodType

 from mindspore import context
 from ..._checkparam import Validator as validator
 from ..._checkparam import Rel
 from ...common import dtype as mstype
 from ..primitive import prim_attr_register, PrimitiveWithInfer


 def _check_mode(class_name):
    """Check for PyNative mode."""
    mode = context.get_context('mode')
    if mode == context.PYNATIVE_MODE:
        raise RuntimeError(f'{class_name} operator does not support PyNative mode.')


 def _check_summary_param(name, value, class_name):
    """Checks the name and value is valid for summary."""
    _check_mode(class_name)
    n_type = name['dtype']
    n_value = name['value']
    validator.check_value_type('name', n_type, [type(mstype.string)], class_name)
--- a/mindspore/train/callback/_summary_collector.py
+++ b/mindspore/train/callback/_summary_collector.py
@@ -99,7 +99,9 @@ class SummaryCollector(Callback):
            - collect_eval_lineage (bool): Whether to collect lineage data for the evaluation phase,
              this field will be displayed on the lineage page of Mindinsight. Optional: True/False. Default: True.
            - collect_input_data (bool): Whether to collect dataset for each training.
              Currently only image data is supported. Optional: True/False. Default: True.
              Currently only image data is supported.
              If there are multiple columns of data in the dataset, the first column should be image data.
              Optional: True/False. Default: True.
            - collect_dataset_graph (bool): Whether to collect dataset graph for the training phase.
              Optional: True/False. Default: True.
            - histogram_regular (Union[str, None]): Collect weight and bias for parameter distribution page
@@ -122,7 +124,7 @@ class SummaryCollector(Callback):
            Default: None, which means to follow the behavior as described above. For example, given `collect_freq=10`,
            when the total steps is 600, TensorSummary will be collected 20 steps, while other summary data 61 steps,
            but when the total steps is 20, both TensorSummary and other summary will be collected 3 steps.
            Also note that when in parallel mode, the total steps will be splitted evenly, which will
            Also note that when in parallel mode, the total steps will be split evenly, which will
            affect the number of steps TensorSummary will be collected.
        max_file_size (Optional[int]): The maximum size in bytes of each file that can be written to the disk.
            Default: None, which means no limit. For example, to write not larger than 4GB,
@@ -479,17 +481,21 @@ class SummaryCollector(Callback):
        if not self._collect_specified_data.get('collect_input_data'):
            return

        if self._dataset_sink_mode and context.get_context('device_target') == 'Ascend':
        input_data = getattr(cb_params, 'train_dataset_element', None)
        if not isinstance(input_data, (Tensor, list, tuple)):
            self._collect_specified_data['collect_input_data'] = False
            logger.warning('On Ascend device, SummaryCollector is not supported to record input data '
                           'in dataset sink mode.')
            logger.warning("The type of input data is not Tensor/list/tuple, "
                           "so SummaryCollector will not collect input data.")
            return

        input_data = getattr(cb_params, 'train_dataset_element', None)
        if input_data is None:
        if not isinstance(input_data, Tensor) and not input_data:
            self._collect_specified_data['collect_input_data'] = False
            logger.info("The 'train_dataset_element' in cb_params is None, "
                        "so 'SummaryCollector' will not record the input data.")
            logger.warning("The 'train_dataset_element' in cb_params is empty, "
                           "so SummaryCollector will not record the input data.")

            if self._dataset_sink_mode and context.get_context('device_target') == 'Ascend':
                logger.warning('On Ascend device, SummaryCollector is not supported to record input data '
                               'in dataset sink mode.')
            return

        if isinstance(input_data, (list, tuple)) and input_data:
@@ -522,6 +528,8 @@ class SummaryCollector(Callback):
        network = cb_params.train_network if cb_params.mode == ModeEnum.TRAIN.value else cb_params.eval_network
        graph_proto = network.get_func_graph_proto()
        if graph_proto is None:
            logger.warning("Can not get graph proto, it may not be 'GRAPH_MODE' in context currently, "
                           "so SummaryCollector will not collect graph.")
            return

        self._record.add_value(PluginEnum.GRAPH.value, 'train_network/auto', graph_proto)
@@ -538,7 +546,7 @@ class SummaryCollector(Callback):
        try:
            self._record.add_value(PluginEnum.SCALAR.value, 'loss/auto', loss)
        except ValueError:
            logger.warning("The output of network is not a scalar, so will not collect loss in SummaryCollector.")
            logger.warning("The output of network is not a scalar, so SummaryCollector will not collect loss.")
            self._collect_specified_data['collect_metric'] = False

    def _get_loss(self, cb_params):
@@ -557,7 +565,7 @@ class SummaryCollector(Callback):

        output = cb_params.net_outputs
        if output is None:
            logger.warning("Can not find any output by this network, so will not collect loss in SummaryCollector.")
            logger.warning("Can not find any output by this network, so SummaryCollector will not collect loss.")
            self._is_parse_loss_success = False
            return None

--- a/mindspore/train/summary/_summary_adapter.py
+++ b/mindspore/train/summary/_summary_adapter.py
@@ -422,7 +422,7 @@ def _make_canvas_for_imgs(tensor, col_imgs=8):
        col_imgs (Number): The image colume number. Default: 8.

    Returns:
        Tensor, retrun canvas of image.
        Tensor, return canvas of image.
    """
    # expand the N1HW to N3HW
    if tensor.shape[1] == 1:
@@ -435,7 +435,7 @@ def _make_canvas_for_imgs(tensor, col_imgs=8):
    cols = min(n, col_imgs)
    rows = int(np.ceil(float(n) / cols))

    # creat the canvas: expand the n
    # create the canvas: expand the n
    out_canvas = np.zeros((3, h * rows, w * cols))
    i = 0
    for y in range(rows):
--- a/tests/st/summary/test_summary_collector.py
+++ b/tests/st/summary/test_summary_collector.py
@@ -168,7 +168,7 @@ class TestSummary:
    @pytest.mark.platform_arm_ascend_training
    @pytest.mark.env_onecard
    def test_summarycollector_user_defind(self):
        """Test SummaryCollector with user defind."""
        """Test SummaryCollector with user-defined."""
        summary_dir = self._run_network(dataset_sink_mode=True, num_samples=2,
                                        custom_lineage_data={'test': 'self test'},
                                        export_options={'tensor_format': 'npy'})
--- a/tests/ut/python/pynative_mode/test_insert_grad_of.py
+++ b/tests/ut/python/pynative_mode/test_insert_grad_of.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -93,33 +93,6 @@ def test_InsertGradientOf_2():
    print("clip_gradient:", fd(1.1, 0.1))


 summary = P.ScalarSummary()


 def debug_gradient(dx):
    """ debug_gradient """
    summary("dx: ", dx)
    return dx


 debug = P.InsertGradientOf(debug_gradient)


 def test_InsertGradientOf_3():
    """ test_InsertGradientOf_3 """

    def debug_test(x, y):
        x = debug(x)
        y = debug(y)
        c = x * y
        return c

    def f(x, y):
        return grad_all(debug_test)(x, y)

    print("debug_gradient:", f(Tensor(1.0), Tensor(2.0)))


 def test_print_shape_type():
    class Mul(nn.Cell):
        def __init__(self):
--- a/tests/ut/python/train/summary/test_summary_collector.py
+++ b/tests/ut/python/train/summary/test_summary_collector.py
@@ -55,7 +55,7 @@ _SPECIFIED_DATA['collect_metric'] = False


 class CustomNet(Cell):
    """Define custom netwrok."""
    """Define custom network."""
    def __init__(self):
        super(CustomNet, self).__init__()
        self.add = TensorAdd