metric and optim comments update

4 years ago · 3bf0d91158
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -1216,9 +1216,8 @@ class Cell(Cell_):
        """
        Add cast on all inputs of cell and child cells to run with certain float type.

        If `dst_type is mindspore.dtype.float16`, all the inputs of Cell including input, Parameter, Tensor
        as const will be cast to float16. Please refer to the usage in source code of
        `mindspore.train.amp.build_train_network`.
        If `dst_type` is `mindspore.dtype.float16`, all the inputs of Cell, including input, Parameter and Tensor, will
        be cast to float16. Please refer to the usage in source code of :func:`mindspore.build_train_network`.

        Note:
            Multiple calls will overwrite.
@@ -1232,6 +1231,13 @@ class Cell(Cell_):

        Raises:
            ValueError: If dst_type is not float32 or float16.

        Examples:
            >>> import mindspore.nn as nn
            >>> from mindspore import dtype as mstype
            >>>
            >>> net = nn.Conv2d(120, 240, 4, has_bias=False, weight_init='normal')
            >>> net.to_float(mstype.float16)
        """
        if dst_type not in (mstype.float16, mstype.float32):
            raise ValueError("The dst_type should inside float32 or float16.")
--- a/mindspore/nn/metrics/init.py
+++ b/mindspore/nn/metrics/init.py
@@ -95,7 +95,7 @@ __factory__ = {

 def names():
    """
    Gets the names of the metric methods.
    Gets all names of the metric methods.

    Returns:
        List, the name list of metric methods.
--- a/mindspore/nn/metrics/confusion_matrix.py
+++ b/mindspore/nn/metrics/confusion_matrix.py
@@ -182,8 +182,6 @@ class ConfusionMatrixMetric(Metric):
        >>> x = Tensor(np.array([[[0], [1]], [[1], [0]]]))
        >>> y = Tensor(np.array([[[0], [1]], [[0], [1]]]))
        >>> metric.update(x, y)
        >>> x = Tensor(np.array([[[0], [1]], [[1], [0]]]))
        >>> y = Tensor(np.array([[[0], [1]], [[1], [0]]]))
        >>> avg_output = metric.eval()
        >>> print(avg_output)
        [0.5]
--- a/mindspore/nn/optim/optimizer.py
+++ b/mindspore/nn/optim/optimizer.py
@@ -287,7 +287,8 @@ class Optimizer(Cell):
        """
        Weight decay.

        An approach to reduce the overfitting of a deep learning neural network model.
        An approach to reduce the overfitting of a deep learning neural network model. User-defined optimizers based
        on :class:`mindspore.nn.Optimizer` can also call this interface to apply weight decay.

        Args:
            gradients (tuple[Tensor]): The gradients of `self.parameters`, and have the same shape as
@@ -311,8 +312,9 @@ class Optimizer(Cell):
        """
        Gradients centralization.

        A method for optimizing convolutional layer parameters to impore the training speed of a deep learning neural
        network model.
        A method for optimizing convolutional layer parameters to improve the training speed of a deep learning neural
        network model. User-defined optimizers based on :class:`mindspore.nn.Optimizer` can also call this interface to
        centralize gradients.

        Args:
            gradients (tuple[Tensor]): The gradients of `self.parameters`, and have the same shape as
@@ -331,7 +333,8 @@ class Optimizer(Cell):
        Loss scale for mixed precision.

        An approach of mixed precision training to improve the speed and energy efficiency of training deep neural
        network.
        network. User-defined optimizers based on :class:`mindspore.nn.Optimizer` can also call this interface to
        restore gradients.

        Args:
            gradients (tuple[Tensor]): The gradients of `self.parameters`, and have the same shape as
@@ -546,7 +549,8 @@ class Optimizer(Cell):

    def get_lr(self):
        """
        Get the learning rate of current step.
        The optimizer calls this interface to get the learning rate for the current step. User-defined optimizers based
        on :class:`mindspore.nn.Optimizer` can also call this interface before updating the parameters.

        Returns:
            float, the learning rate of current step.
@@ -566,13 +570,27 @@ class Optimizer(Cell):

    def get_lr_parameter(self, param):
        """
        Get the learning rate of parameter.
        When parameters is grouped and learning rate is different for each group. Get the learning rate for the
        specified `param`.

        Args:
            param (Union[Parameter, list[Parameter]]): The `Parameter` or list of `Parameter`.

        Returns:
            Parameter, single `Parameter` or `list[Parameter]` according to the input type.
            Parameter, single `Parameter` or `list[Parameter]` according to the input type. If learning rate is dynamic,
            `Cell` or `list[Cell]` that used to calculate the learning rate will be returned.

        Examples:
            >>> from mindspore import nn
            >>> net = Net()
            >>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
            >>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
            >>> group_params = [{'params': conv_params, 'lr': 0.05},
            ...                 {'params': no_conv_params, 'lr': 0.01}]
            >>> optim = nn.Momentum(group_params, learning_rate=0.1, momentum=0.9, weight_decay=0.0)
            >>> conv_lr = optim.get_lr_parameter(conv_params)
            >>> print(conv_lr[0].asnumpy())
            0.05
        """
        def get_lr_value(learning_rate):
            if isinstance(learning_rate, (_ConvertToCell, _IteratorLearningRate)):
--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@@ -592,7 +592,7 @@ class WithEvalCell(Cell):
    Args:
        network (Cell): The network Cell.
        loss_fn (Cell): The loss Cell.
        add_cast_fp32 (bool): Adjust the data type to float32.
        add_cast_fp32 (bool): Adjust the data type to float32. Default: False.

    Inputs:
        - **data** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -136,6 +136,9 @@ class DynamicLossScaleUpdateCell(Cell):
    def get_loss_scale(self):
        """
        Get Loss Scale value.

        Returns:
            float, the loss scale value.
        """
        return self.loss_scale_value

@@ -210,6 +213,9 @@ class FixedLossScaleUpdateCell(Cell):
    def get_loss_scale(self):
        """
        Get Loss Scale value.

        Returns:
            float, the loss scale value.
        """
        return self.loss_scale_value

@@ -286,9 +292,14 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
        >>> net_with_loss = nn.WithLossCell(net, loss)
        >>> inputs = Tensor(np.ones([size, in_features]).astype(np.float32))
        >>> label = Tensor(np.zeros([size, out_features]).astype(np.float32))
        >>> scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
        >>> scaling_sens = Tensor([1024], dtype=mstype.float32)
        >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=scaling_sens)
        >>> output = train_network(inputs, label)
        >>>
        >>> # update scaling sens and train the network
        >>> scaling_sens = Tensor([1], dtype=mstype.float32)
        >>> train_network.set_sense_scale(scaling_sens)
        >>> output = train_network(inputs, label)
    """
    def __init__(self, network, optimizer, scale_sense):
        super(TrainOneStepWithLossScaleCell, self).__init__(network, optimizer, sens=None)
@@ -353,7 +364,8 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
        Specify the argument 'pre_cond' and 'compute_input' to make sure overflow status is cleared at the right time.
        Taking this situation as an example, we need to execute state clearing after loss calculation and then detect
        overflow in the process of gradient calculation. In this case, pre_cond should be the output of the loss
        function, and compute_input should be the input of gradients-computing function.
        function, and compute_input should be the input of gradients-computing function. User-defined training network
        based on this class can also call this interface to process the overflow.

        Args:
            - **pre_cond** (Tensor) - A precondition for starting overflow detection. It determines the executing order
@@ -383,7 +395,8 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
        """
        Get floating-point overflow status.

        Get overflow results after executing the target process for overflow detection.
        Get overflow results after executing the target process for overflow detection. User-defined training network
        based on this class can also call this interface to process the overflow.

        Args:
            - **status** (object) - A status instance used to detect the overflow.
@@ -418,6 +431,8 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
        """
        Calculate loss scale according to the overflow.

        User-defined training network based on this class can also call this interface to process the overflow.

        Args:
            - **overflow** (bool) - Whether the overflow occurs or not.