From abf0d30537c2510beb9419f88f35a060e5fce09a Mon Sep 17 00:00:00 2001
From: wangshuide2020 <7511764+wangshuide2020@user.noreply.gitee.com>
Date: Tue, 15 Dec 2020 14:32:19 +0800
Subject: [PATCH] update the doc string of some operations.

---
 mindspore/_extends/parse/standard_method.py |  2 +-
 mindspore/common/tensor.py                  |  2 +-
 mindspore/explainer/_operators.py           |  8 ++--
 mindspore/nn/__init__.py                    |  2 +-
 mindspore/nn/layer/basic.py                 |  2 +-
 mindspore/nn/layer/combined.py              |  4 +-
 mindspore/nn/layer/image.py                 |  4 +-
 mindspore/nn/layer/lstm.py                  |  2 +-
 mindspore/nn/layer/math.py                  |  6 +--
 mindspore/nn/layer/pooling.py               |  8 ++--
 mindspore/nn/layer/quant.py                 | 41 +++++++++++----------
 mindspore/nn/loss/loss.py                   |  6 +--
 mindspore/nn/optim/adam.py                  |  6 +--
 mindspore/nn/optim/proximal_ada_grad.py     |  2 +-
 mindspore/nn/optim/sgd.py                   |  2 +-
 mindspore/ops/operations/nn_ops.py          |  2 +-
 16 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/mindspore/_extends/parse/standard_method.py b/mindspore/_extends/parse/standard_method.py
index 67a6daeb3d..8fb3d7f2f1 100644
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@@ -34,7 +34,7 @@ abs_ = P.Abs()
 
 def mean(x, axis=(), keep_dims=False):
     """
-    Reduce a dimension of a tensor by averaging all elements in the dimension.
+    Reduces a dimension of a tensor by averaging all elements in the dimension.
 
     Args:
         axis (Union[None, int, tuple(int)]): Dimensions of reduction,
diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py
index ad4167a272..0fe5aea748 100644
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@@ -338,7 +338,7 @@ class Tensor(Tensor_):
 
     def mean(self, axis=(), keep_dims=False):
         """
-        Reduce a dimension of a tensor by averaging all elements in the dimension.
+        Reduces a dimension of a tensor by averaging all elements in the dimension.
 
         Args:
             axis (Union[None, int, tuple(int), list(int)]): Dimensions of reduction,
diff --git a/mindspore/explainer/_operators.py b/mindspore/explainer/_operators.py
index bc84118121..76832937d1 100644
--- a/mindspore/explainer/_operators.py
+++ b/mindspore/explainer/_operators.py
@@ -131,21 +131,21 @@ def matmul(inputs_x: Tensor, inputs_y: Tensor) -> Tensor:
 
 
 def maximum(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
-    """Reduce a dimension of a tensor by the maximum value in this dimension."""
+    """Reduces a dimension of a tensor by the maximum value in this dimension."""
     max_op = op.ReduceMax(keep_dims)
     outputs = max_op(inputs, axis)
     return outputs
 
 
 def minimum(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
-    """Reduce a dimension of a tensor by the minimum value in the dimension."""
+    """Reduces a dimension of a tensor by the minimum value in the dimension."""
     max_op = op.ReduceMin(keep_dims)
     outputs = max_op(inputs, axis)
     return outputs
 
 
 def mean(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
-    """Reduce a dimension of a tensor by averaging all elements in the dimension."""
+    """Reduces a dimension of a tensor by averaging all elements in the dimension."""
     mean_op = op.ReduceMean(keep_dims)
     outputs = mean_op(inputs, axis)
     return outputs
@@ -243,7 +243,7 @@ def softmax(axis: int = -1) -> Callable:
 
 
 def summation(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
-    """Reduce a dimension of a tensor by summing all elements in the dimension."""
+    """Reduces a dimension of a tensor by summing all elements in the dimension."""
     sum_op = op.ReduceSum(keep_dims)
     outputs = sum_op(inputs, axis)
     return outputs
diff --git a/mindspore/nn/__init__.py b/mindspore/nn/__init__.py
index 19f70bf348..8e81b52267 100644
--- a/mindspore/nn/__init__.py
+++ b/mindspore/nn/__init__.py
@@ -15,7 +15,7 @@
 """
 Neural Networks Cells.
 
-Pre-defined building blocks or computing units to construct Neural Networks.
+Pre-defined building blocks or computing units to construct neural networks.
 """
 from . import layer, loss, optim, metrics, wrap, probability, sparse
 from .learning_rate_schedule import *
diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py
index 8a8ed81c5a..c7698f6d10 100644
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@@ -897,7 +897,7 @@ class MatrixDiagPart(Cell):
 
 class MatrixSetDiag(Cell):
     r"""
-    Modify the batched diagonal part of a batched tensor.
+    Modifies the batched diagonal part of a batched tensor.
 
     Inputs:
         - **x** (Tensor) - The batched tensor. Rank k+1, where k >= 1. It can be one of the following data types:
diff --git a/mindspore/nn/layer/combined.py b/mindspore/nn/layer/combined.py
index 8475c01122..fa8591897f 100644
--- a/mindspore/nn/layer/combined.py
+++ b/mindspore/nn/layer/combined.py
@@ -30,9 +30,9 @@ __all__ = [
 
 class Conv2dBnAct(Cell):
     r"""
-    A combination of convolution, Batchnorm, activation layer.
+    A combination of convolution, Batchnorm, and activation layer.
 
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
 
     Args:
         in_channels (int): The number of input channel :math:`C_{in}`.
diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py
index 4af3e6fcf3..d693bd374e 100644
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@@ -186,7 +186,7 @@ def _compute_multi_channel_loss(c1, c2, img1, img2, conv, concat, mean):
 
 class SSIM(Cell):
     r"""
-    Returns SSIM index between img1 and img2.
+    Returns SSIM index between two images.
 
     Its implementation is based on Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). `Image quality
     assessment: from error visibility to structural similarity <https://ieeexplore.ieee.org/document/1284395>`_.
@@ -266,7 +266,7 @@ def _downsample(img1, img2, op):
 
 class MSSSIM(Cell):
     r"""
-    Returns MS-SSIM index between img1 and img2.
+    Returns MS-SSIM index between two images.
 
     Its implementation is based on Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. `Multiscale structural similarity
     for image quality assessment <https://ieeexplore.ieee.org/document/1292216>`_.
diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py
index 144e2d453d..9ea1fce82f 100755
--- a/mindspore/nn/layer/lstm.py
+++ b/mindspore/nn/layer/lstm.py
@@ -43,7 +43,7 @@ def _check_input_dtype(input_dtype, param_name, allow_dtypes, cls_name):
 
 class LSTM(Cell):
     r"""
-    LSTM (Long Short-Term Memory) layer.
+    Stacked LSTM (Long Short-Term Memory) layers.
 
     Apply LSTM layer to the input.
 
diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py
index ca0f0091aa..15a3100650 100644
--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@@ -45,7 +45,7 @@ def _check_input_dtype(param_name, input_dtype, allow_dtypes, cls_name):
 
 class ReduceLogSumExp(Cell):
     r"""
-    Reduce a dimension of a tensor by calculating exponential for all elements in the dimension,
+    Reduces a dimension of a tensor by calculating exponential for all elements in the dimension,
     then calculate logarithm of the sum.
 
     The dtype of the tensor to be reduced is number.
@@ -158,7 +158,7 @@ class Range(Cell):
 
 class LGamma(Cell):
     r"""
-    Calculate LGamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
+    Calculates LGamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
     The algorithm is:
 
     .. math::
@@ -886,7 +886,7 @@ class MatMul(Cell):
 
 class Moments(Cell):
     """
-    Calculate the mean and variance of `x`.
+    Calculates the mean and variance of `x`.
 
     Args:
         axis (Union[int, tuple(int)]): Calculates the mean and variance along the specified axis. Default: ().
diff --git a/mindspore/nn/layer/pooling.py b/mindspore/nn/layer/pooling.py
index 9a4fd7cf2c..22841eca64 100644
--- a/mindspore/nn/layer/pooling.py
+++ b/mindspore/nn/layer/pooling.py
@@ -62,7 +62,7 @@ def _shape_check(in_shape):
 
 class MaxPool2d(_PoolNd):
     r"""
-    Max pooling operation for temporal data.
+    2D max pooling operation for temporal data.
 
     Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes.
 
@@ -132,7 +132,7 @@ class MaxPool2d(_PoolNd):
 
 class MaxPool1d(_PoolNd):
     r"""
-    Max pooling operation for temporal data.
+    1D max pooling operation for temporal data.
 
     Applies a 1D max pooling over an input Tensor which can be regarded as a composition of 1D planes.
 
@@ -206,7 +206,7 @@ class MaxPool1d(_PoolNd):
 
 class AvgPool2d(_PoolNd):
     r"""
-    Average pooling for temporal data.
+    2D average pooling for temporal data.
 
     Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes.
 
@@ -280,7 +280,7 @@ class AvgPool2d(_PoolNd):
 
 class AvgPool1d(_PoolNd):
     r"""
-    Average pooling for temporal data.
+    1D average pooling for temporal data.
 
     Applies a 1D average pooling over an input Tensor which can be regarded as a composition of 1D input planes.
 
diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py
index 562f5f1c3d..870a911e6a 100644
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -210,7 +210,7 @@ class UniformQuantObserver(_Observer):
 
 class FakeQuantWithMinMaxObserver(UniformQuantObserver):
     r"""
-    Quantization aware op. This OP provides the fake quantization observer function on data with min and max.
+    Quantization aware operation which provides the fake quantization observer function on data with min and max.
 
     Args:
         min_init (int, float): The initialized min value. Default: -6.
@@ -273,7 +273,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
         self.narrow_range = narrow_range
         self.is_ascend = context.get_context('device_target') == "Ascend"
 
-        # init tensor min and max for fake quant op
+        # init tensor min and max for fake quantized operation
         if self.per_channel:
             min_array = np.array([self.min_init] * self.num_channels).astype(np.float32)
             max_array = np.array([self.max_init] * self.num_channels).astype(np.float32)
@@ -335,9 +335,9 @@ quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activatio
 
 class Conv2dBnFoldQuantOneConv(Cell):
     r"""
-    2D convolution with BatchNormal op folded construct.
+    2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct.
 
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
 
     Args:
         in_channels (int): The number of input channel :math:`C_{in}`.
@@ -546,9 +546,9 @@ class Conv2dBnFoldQuantOneConv(Cell):
 
 class Conv2dBnFoldQuant(Cell):
     r"""
-    2D convolution with BatchNormal op folded construct.
+    2D convolution with BatchNormal operation folded construct.
 
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
 
     Args:
         in_channels (int): The number of input channel :math:`C_{in}`.
@@ -730,9 +730,9 @@ class Conv2dBnFoldQuant(Cell):
 
 class Conv2dBnWithoutFoldQuant(Cell):
     r"""
-    2D convolution + batchnorm without fold with fake quant construct.
+    2D convolution and batchnorm without fold with fake quantized construct.
 
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
 
     Args:
         in_channels (int): The number of input channel :math:`C_{in}`.
@@ -844,9 +844,9 @@ class Conv2dBnWithoutFoldQuant(Cell):
 
 class Conv2dQuant(Cell):
     r"""
-    2D convolution with fake quant op layer.
+    2D convolution with fake quantized operation layer.
 
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
 
     Args:
         in_channels (int): The number of input channel :math:`C_{in}`.
@@ -953,9 +953,9 @@ class Conv2dQuant(Cell):
 
 class DenseQuant(Cell):
     r"""
-    The fully connected layer with fake quant op.
+    The fully connected layer with fake quantized operation.
 
-    This part is a more detailed overview of Dense op.
+    This part is a more detailed overview of Dense operation.
 
     Args:
         in_channels (int): The dimension of the input space.
@@ -1057,7 +1057,8 @@ class DenseQuant(Cell):
 
 class _QuantActivation(Cell):
     r"""
-    Base class for quantization aware training activation function. Add Fake Quant OP after activation OP.
+    Base class for quantization aware training activation function. Add fake quantized operation
+    after activation operation.
     """
 
     def get_origin(self):
@@ -1068,14 +1069,14 @@ class ActQuant(_QuantActivation):
     r"""
     Quantization aware training activation function.
 
-    Add the fake quant op to the end of activation op, by which the output of activation op will be truncated.
-    Please check `FakeQuantWithMinMaxObserver` or other observer for more details.
+    Add the fake quantized operation to the end of activation operation, by which the output of activation operation
+    will be truncated. Please check `FakeQuantWithMinMaxObserver` or other observer for more details.
 
     Args:
         activation (Cell): Activation cell.
         ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
-        fake_before (bool): Whether add fake quant operation before activation. Default: False.
+        fake_before (bool): Whether add fake quantized operation before activation. Default: False.
         quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
             both set to default FakeQuantWithMinMaxObserver.
         quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -1134,9 +1135,9 @@ class ActQuant(_QuantActivation):
 
 class TensorAddQuant(Cell):
     r"""
-    Add Fake Quant OP after TensorAdd OP.
+    Add fake quantized operation after TensorAdd operation.
 
-    This part is a more detailed overview of TensorAdd op.
+    This part is a more detailed overview of TensorAdd operation.
 
     Args:
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
@@ -1185,9 +1186,9 @@ class TensorAddQuant(Cell):
 
 class MulQuant(Cell):
     r"""
-    Add Fake Quant OP after Mul OP.
+    Add fake quantized operation after `Mul` operation.
 
-    This part is a more detailed overview of Mul op.
+    This part is a more detailed overview of `Mul` operation.
 
     Args:
         ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py
index 2a9bc9a696..43c8d50dcd 100644
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@@ -66,7 +66,7 @@ class _Loss(Cell):
 
 class L1Loss(_Loss):
     r"""
-    L1Loss creates a criterion to measure the mean absolute error (MAE) between :math:`x` and :math:`y` by element,
+    L1Loss creates a criterion to measure the mean absolute error (MAE) between :math:`x` and :math:`y` element-wise,
     where :math:`x` is the input Tensor and :math:`y` is the target Tensor.
 
     For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
@@ -114,7 +114,7 @@ class L1Loss(_Loss):
 class MSELoss(_Loss):
     r"""
     MSELoss creates a criterion to measure the mean squared error (squared L2-norm) between :math:`x` and :math:`y`
-    by element, where :math:`x` is the input and :math:`y` is the target.
+    element-wise, where :math:`x` is the input and :math:`y` is the target.
 
     For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
     the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as:
@@ -488,7 +488,7 @@ class SampledSoftmaxLoss(_Loss):
 
 class BCELoss(_Loss):
     r"""
-    BCELoss creates a criterion to measure the Binary Cross Entropy between the true labels and predicted labels.
+    BCELoss creates a criterion to measure the binary cross entropy between the true labels and predicted labels.
 
     Note:
         Set the predicted labels as :math:`x`, true labels as :math:`y`, the output loss as :math:`\ell(x, y)`.
diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 7eb51065ab..e32990cd14 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -465,9 +465,9 @@ class AdamWeightDecay(Optimizer):
 
 class AdamOffload(Optimizer):
     r"""
-    Updates gradients by the Adaptive Moment Estimation (Adam) algorithm. This optimizer will offload Adam optimizer to
-    host CPU and keep parameters being updated on the device, to minimize the memory cost. Although that would bring
-    about an increase of performance overhead, the optimizer could be used to run a larger model.
+    This optimizer will offload Adam optimizer to host CPU and keep parameters being updated on the device,
+    to minimize the memory cost. Although that would bring about an increase of performance overhead,
+    the optimizer could be used to run a larger model.
 
     The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
 
diff --git a/mindspore/nn/optim/proximal_ada_grad.py b/mindspore/nn/optim/proximal_ada_grad.py
index bf458fbb65..1b77f03321 100644
--- a/mindspore/nn/optim/proximal_ada_grad.py
+++ b/mindspore/nn/optim/proximal_ada_grad.py
@@ -51,7 +51,7 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):
 
 class ProximalAdagrad(Optimizer):
     """
-    Implement the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
+    Implements the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
 
     ProximalAdagrad is an online Learning and Stochastic Optimization.
     Refer to paper `Efficient Learning using Forward-Backward Splitting
diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py
index f916638b56..2075d3c74d 100755
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -33,7 +33,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, accum, s
 
 class SGD(Optimizer):
     r"""
-    Implements stochastic gradient descent (optionally with momentum).
+    Implements stochastic gradient descent. Momentum is optional.
 
     Introduction to SGD can be found at https://en.wikipedia.org/wiki/Stochastic_gradient_descent.
     Nesterov momentum is based on the formula from paper `On the importance of initialization and
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 6112f6f5cf..42a163894f 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -4306,7 +4306,7 @@ class KLDivLoss(PrimitiveWithInfer):
 
 class BinaryCrossEntropy(PrimitiveWithInfer):
     r"""
-    Computes the Binary Cross Entropy between the target and the output.
+    Computes the binary cross entropy between the target and the output.
 
     Note:
         Sets input as :math:`x`, input label as :math:`y`, output as :math:`\ell(x, y)`.