!9973 update the doc string of some operations.

From: @wangshuide2020 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghui
5 years ago · 9eb6fb01bc
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@@ -34,7 +34,7 @@ abs_ = P.Abs()

 def mean(x, axis=(), keep_dims=False):
    """
    Reduce a dimension of a tensor by averaging all elements in the dimension.
    Reduces a dimension of a tensor by averaging all elements in the dimension.

    Args:
        axis (Union[None, int, tuple(int)]): Dimensions of reduction,
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@@ -338,7 +338,7 @@ class Tensor(Tensor_):

    def mean(self, axis=(), keep_dims=False):
        """
        Reduce a dimension of a tensor by averaging all elements in the dimension.
        Reduces a dimension of a tensor by averaging all elements in the dimension.

        Args:
            axis (Union[None, int, tuple(int), list(int)]): Dimensions of reduction,
--- a/mindspore/explainer/_operators.py
+++ b/mindspore/explainer/_operators.py
@@ -131,21 +131,21 @@ def matmul(inputs_x: Tensor, inputs_y: Tensor) -> Tensor:


 def maximum(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
    """Reduce a dimension of a tensor by the maximum value in this dimension."""
    """Reduces a dimension of a tensor by the maximum value in this dimension."""
    max_op = op.ReduceMax(keep_dims)
    outputs = max_op(inputs, axis)
    return outputs


 def minimum(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
    """Reduce a dimension of a tensor by the minimum value in the dimension."""
    """Reduces a dimension of a tensor by the minimum value in the dimension."""
    max_op = op.ReduceMin(keep_dims)
    outputs = max_op(inputs, axis)
    return outputs


 def mean(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
    """Reduce a dimension of a tensor by averaging all elements in the dimension."""
    """Reduces a dimension of a tensor by averaging all elements in the dimension."""
    mean_op = op.ReduceMean(keep_dims)
    outputs = mean_op(inputs, axis)
    return outputs
@@ -243,7 +243,7 @@ def softmax(axis: int = -1) -> Callable:


 def summation(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
    """Reduce a dimension of a tensor by summing all elements in the dimension."""
    """Reduces a dimension of a tensor by summing all elements in the dimension."""
    sum_op = op.ReduceSum(keep_dims)
    outputs = sum_op(inputs, axis)
    return outputs
--- a/mindspore/nn/init.py
+++ b/mindspore/nn/init.py
@@ -15,7 +15,7 @@
 """
 Neural Networks Cells.

 Pre-defined building blocks or computing units to construct Neural Networks.
 Pre-defined building blocks or computing units to construct neural networks.
 """
 from . import layer, loss, optim, metrics, wrap, probability, sparse, dynamic_lr
 from .learning_rate_schedule import *
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@@ -913,7 +913,7 @@ class MatrixDiagPart(Cell):

 class MatrixSetDiag(Cell):
    r"""
    Modify the batched diagonal part of a batched tensor.
    Modifies the batched diagonal part of a batched tensor.

    Inputs:
        - **x** (Tensor) - The batched tensor. Rank k+1, where k >= 1. It can be one of the following data types:
--- a/mindspore/nn/layer/combined.py
+++ b/mindspore/nn/layer/combined.py
@@ -30,9 +30,9 @@ __all__ = [

 class Conv2dBnAct(Cell):
    r"""
    A combination of convolution, Batchnorm, activation layer.
    A combination of convolution, Batchnorm, and activation layer.

    This part is a more detailed overview of Conv2d op.
    This part is a more detailed overview of Conv2d operation.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@@ -186,7 +186,7 @@ def _compute_multi_channel_loss(c1, c2, img1, img2, conv, concat, mean):

 class SSIM(Cell):
    r"""
    Returns SSIM index between img1 and img2.
    Returns SSIM index between two images.

    Its implementation is based on Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). `Image quality
    assessment: from error visibility to structural similarity <https://ieeexplore.ieee.org/document/1284395>`_.
@@ -266,7 +266,7 @@ def _downsample(img1, img2, op):

 class MSSSIM(Cell):
    r"""
    Returns MS-SSIM index between img1 and img2.
    Returns MS-SSIM index between two images.

    Its implementation is based on Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. `Multiscale structural similarity
    for image quality assessment <https://ieeexplore.ieee.org/document/1292216>`_.
--- a/mindspore/nn/layer/lstm.py
+++ b/mindspore/nn/layer/lstm.py
@@ -43,7 +43,7 @@ def _check_input_dtype(input_dtype, param_name, allow_dtypes, cls_name):

 class LSTM(Cell):
    r"""
    LSTM (Long Short-Term Memory) layer.
    Stacked LSTM (Long Short-Term Memory) layers.

    Apply LSTM layer to the input.

--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@@ -45,7 +45,7 @@ def _check_input_dtype(param_name, input_dtype, allow_dtypes, cls_name):

 class ReduceLogSumExp(Cell):
    r"""
    Reduce a dimension of a tensor by calculating exponential for all elements in the dimension,
    Reduces a dimension of a tensor by calculating exponential for all elements in the dimension,
    then calculate logarithm of the sum.

    The dtype of the tensor to be reduced is number.
@@ -158,7 +158,7 @@ class Range(Cell):

 class LGamma(Cell):
    r"""
    Calculate LGamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
    Calculates LGamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
    The algorithm is:

    .. math::
@@ -886,7 +886,7 @@ class MatMul(Cell):

 class Moments(Cell):
    """
    Calculate the mean and variance of `x`.
    Calculates the mean and variance of `x`.

    Args:
        axis (Union[int, tuple(int)]): Calculates the mean and variance along the specified axis. Default: ().
--- a/mindspore/nn/layer/pooling.py
+++ b/mindspore/nn/layer/pooling.py
@@ -62,7 +62,7 @@ def _shape_check(in_shape):

 class MaxPool2d(_PoolNd):
    r"""
    Max pooling operation for temporal data.
    2D max pooling operation for temporal data.

    Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes.

@@ -139,7 +139,7 @@ class MaxPool2d(_PoolNd):

 class MaxPool1d(_PoolNd):
    r"""
    Max pooling operation for temporal data.
    1D max pooling operation for temporal data.

    Applies a 1D max pooling over an input Tensor which can be regarded as a composition of 1D planes.

@@ -220,7 +220,7 @@ class MaxPool1d(_PoolNd):

 class AvgPool2d(_PoolNd):
    r"""
    Average pooling for temporal data.
    2D average pooling for temporal data.

    Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes.

@@ -294,7 +294,7 @@ class AvgPool2d(_PoolNd):

 class AvgPool1d(_PoolNd):
    r"""
    Average pooling for temporal data.
    1D average pooling for temporal data.

    Applies a 1D average pooling over an input Tensor which can be regarded as a composition of 1D input planes.

--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -210,7 +210,7 @@ class UniformQuantObserver(_Observer):

 class FakeQuantWithMinMaxObserver(UniformQuantObserver):
    r"""
    Quantization aware op. This OP provides the fake quantization observer function on data with min and max.
    Quantization aware operation which provides the fake quantization observer function on data with min and max.

    Args:
        min_init (int, float): The initialized min value. Default: -6.
@@ -273,7 +273,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
        self.narrow_range = narrow_range
        self.is_ascend = context.get_context('device_target') == "Ascend"

        # init tensor min and max for fake quant op
        # init tensor min and max for fake quantized operation
        if self.per_channel:
            min_array = np.array([self.min_init] * self.num_channels).astype(np.float32)
            max_array = np.array([self.max_init] * self.num_channels).astype(np.float32)
@@ -335,9 +335,9 @@ quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activatio

 class Conv2dBnFoldQuantOneConv(Cell):
    r"""
    2D convolution with BatchNormal op folded construct.
    2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct.

    This part is a more detailed overview of Conv2d op.
    This part is a more detailed overview of Conv2d operation.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
@@ -546,9 +546,9 @@ class Conv2dBnFoldQuantOneConv(Cell):

 class Conv2dBnFoldQuant(Cell):
    r"""
    2D convolution with BatchNormal op folded construct.
    2D convolution with BatchNormal operation folded construct.

    This part is a more detailed overview of Conv2d op.
    This part is a more detailed overview of Conv2d operation.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
@@ -730,9 +730,9 @@ class Conv2dBnFoldQuant(Cell):

 class Conv2dBnWithoutFoldQuant(Cell):
    r"""
    2D convolution + batchnorm without fold with fake quant construct.
    2D convolution and batchnorm without fold with fake quantized construct.

    This part is a more detailed overview of Conv2d op.
    This part is a more detailed overview of Conv2d operation.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
@@ -844,9 +844,9 @@ class Conv2dBnWithoutFoldQuant(Cell):

 class Conv2dQuant(Cell):
    r"""
    2D convolution with fake quant op layer.
    2D convolution with fake quantized operation layer.

    This part is a more detailed overview of Conv2d op.
    This part is a more detailed overview of Conv2d operation.

    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
@@ -953,9 +953,9 @@ class Conv2dQuant(Cell):

 class DenseQuant(Cell):
    r"""
    The fully connected layer with fake quant op.
    The fully connected layer with fake quantized operation.

    This part is a more detailed overview of Dense op.
    This part is a more detailed overview of Dense operation.

    Args:
        in_channels (int): The dimension of the input space.
@@ -1057,7 +1057,8 @@ class DenseQuant(Cell):

 class _QuantActivation(Cell):
    r"""
    Base class for quantization aware training activation function. Add Fake Quant OP after activation OP.
    Base class for quantization aware training activation function. Add fake quantized operation
    after activation operation.
    """

    def get_origin(self):
@@ -1068,14 +1069,14 @@ class ActQuant(_QuantActivation):
    r"""
    Quantization aware training activation function.

    Add the fake quant op to the end of activation op, by which the output of activation op will be truncated.
    Please check `FakeQuantWithMinMaxObserver` or other observer for more details.
    Add the fake quantized operation to the end of activation operation, by which the output of activation operation
    will be truncated. Please check `FakeQuantWithMinMaxObserver` or other observer for more details.

    Args:
        activation (Cell): Activation cell.
        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        fake_before (bool): Whether add fake quant operation before activation. Default: False.
        fake_before (bool): Whether add fake quantized operation before activation. Default: False.
        quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
            both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -1134,9 +1135,9 @@ class ActQuant(_QuantActivation):

 class TensorAddQuant(Cell):
    r"""
    Add Fake Quant OP after TensorAdd OP.
    Add fake quantized operation after TensorAdd operation.

    This part is a more detailed overview of TensorAdd op.
    This part is a more detailed overview of TensorAdd operation.

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
@@ -1185,9 +1186,9 @@ class TensorAddQuant(Cell):

 class MulQuant(Cell):
    r"""
    Add Fake Quant OP after Mul OP.
    Add fake quantized operation after `Mul` operation.

    This part is a more detailed overview of Mul op.
    This part is a more detailed overview of `Mul` operation.

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@@ -66,7 +66,7 @@ class _Loss(Cell):

 class L1Loss(_Loss):
    r"""
    L1Loss creates a criterion to measure the mean absolute error (MAE) between :math:`x` and :math:`y` by element,
    L1Loss creates a criterion to measure the mean absolute error (MAE) between :math:`x` and :math:`y` element-wise,
    where :math:`x` is the input Tensor and :math:`y` is the target Tensor.

    For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
@@ -114,7 +114,7 @@ class L1Loss(_Loss):
 class MSELoss(_Loss):
    r"""
    MSELoss creates a criterion to measure the mean squared error (squared L2-norm) between :math:`x` and :math:`y`
    by element, where :math:`x` is the input and :math:`y` is the target.
    element-wise, where :math:`x` is the input and :math:`y` is the target.

    For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
    the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as:
@@ -490,7 +490,7 @@ class SampledSoftmaxLoss(_Loss):

 class BCELoss(_Loss):
    r"""
    BCELoss creates a criterion to measure the Binary Cross Entropy between the true labels and predicted labels.
    BCELoss creates a criterion to measure the binary cross entropy between the true labels and predicted labels.

    Note:
        Set the predicted labels as :math:`x`, true labels as :math:`y`, the output loss as :math:`\ell(x, y)`.
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -465,9 +465,9 @@ class AdamWeightDecay(Optimizer):

 class AdamOffload(Optimizer):
    r"""
    Updates gradients by the Adaptive Moment Estimation (Adam) algorithm. This optimizer will offload Adam optimizer to
    host CPU and keep parameters being updated on the device, to minimize the memory cost. Although that would bring
    about an increase of performance overhead, the optimizer could be used to run a larger model.
    This optimizer will offload Adam optimizer to host CPU and keep parameters being updated on the device,
    to minimize the memory cost. Although that would bring about an increase of performance overhead,
    the optimizer could be used to run a larger model.

    The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.

--- a/mindspore/nn/optim/proximal_ada_grad.py
+++ b/mindspore/nn/optim/proximal_ada_grad.py
@@ -51,7 +51,7 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):

 class ProximalAdagrad(Optimizer):
    """
    Implement the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
    Implements the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.

    ProximalAdagrad is an online Learning and Stochastic Optimization.
    Refer to paper `Efficient Learning using Forward-Backward Splitting
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -33,7 +33,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, accum, s

 class SGD(Optimizer):
    r"""
    Implements stochastic gradient descent (optionally with momentum).
    Implements stochastic gradient descent. Momentum is optional.

    Introduction to SGD can be found at https://en.wikipedia.org/wiki/Stochastic_gradient_descent.
    Nesterov momentum is based on the formula from paper `On the importance of initialization and
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -4306,7 +4306,7 @@ class KLDivLoss(PrimitiveWithInfer):

 class BinaryCrossEntropy(PrimitiveWithInfer):
    r"""
    Computes the Binary Cross Entropy between the target and the output.
    Computes the binary cross entropy between the target and the output.

    Note:
        Sets input as :math:`x`, input label as :math:`y`, output as :math:`\ell(x, y)`.