diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py
index 3a3024628c..acf9a71190 100644
--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@@ -284,7 +284,7 @@ class LGamma(Cell):
 
 class DiGamma(Cell):
     r"""
-    Calculate Digamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
+    Calculates Digamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
     The algorithm is:
 
     .. math::
@@ -549,7 +549,7 @@ def _IgammacContinuedFraction(ax, x, a, enabled):
 
 class IGamma(Cell):
     r"""
-    Calculate lower regularized incomplete Gamma function.
+    Calculates lower regularized incomplete Gamma function.
     The lower regularized incomplete Gamma function is defined as:
 
     .. math::
@@ -950,7 +950,7 @@ class Moments(Cell):
 
 class MatInverse(Cell):
     """
-    Calculate the inverse of Positive-Definite Hermitian matrix using Cholesky decomposition.
+    Calculates the inverse of Positive-Definite Hermitian matrix using Cholesky decomposition.
 
     Supported Platforms:
         ``GPU``
@@ -987,7 +987,7 @@ class MatInverse(Cell):
 
 class MatDet(Cell):
     """
-    Calculate the determinant of Positive-Definite Hermitian matrix using Cholesky decomposition.
+    Calculates the determinant of Positive-Definite Hermitian matrix using Cholesky decomposition.
 
     Supported Platforms:
         ``GPU``
diff --git a/mindspore/nn/learning_rate_schedule.py b/mindspore/nn/learning_rate_schedule.py
index 257574666d..27dec3c7e1 100644
--- a/mindspore/nn/learning_rate_schedule.py
+++ b/mindspore/nn/learning_rate_schedule.py
@@ -53,7 +53,7 @@ def _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, cls_name):
 
 class ExponentialDecayLR(LearningRateSchedule):
     r"""
-    Calculate learning rate base on exponential decay function.
+    Calculates learning rate base on exponential decay function.
 
     For the i-th step, the formula of computing decayed_learning_rate[i] is:
 
@@ -111,7 +111,7 @@ class ExponentialDecayLR(LearningRateSchedule):
 
 class NaturalExpDecayLR(LearningRateSchedule):
     r"""
-    Calculate learning rate base on natural exponential decay function.
+    Calculates learning rate base on natural exponential decay function.
 
     For the i-th step, the formula of computing decayed_learning_rate[i] is:
 
@@ -170,7 +170,7 @@ class NaturalExpDecayLR(LearningRateSchedule):
 
 class InverseDecayLR(LearningRateSchedule):
     r"""
-    Calculate learning rate base on inverse-time decay function.
+    Calculates learning rate base on inverse-time decay function.
 
     For the i-th step, the formula of computing decayed_learning_rate[i] is:
 
@@ -227,7 +227,7 @@ class InverseDecayLR(LearningRateSchedule):
 
 class CosineDecayLR(LearningRateSchedule):
     r"""
-    Calculate learning rate base on cosine decay function.
+    Calculates learning rate base on cosine decay function.
 
     For the i-th step, the formula of computing decayed_learning_rate[i] is:
 
@@ -283,7 +283,7 @@ class CosineDecayLR(LearningRateSchedule):
 
 class PolynomialDecayLR(LearningRateSchedule):
     r"""
-    Calculate learning rate base on polynomial decay function.
+    Calculates learning rate base on polynomial decay function.
 
     For the i-th step, the formula of computing decayed_learning_rate[i] is:
 
@@ -362,7 +362,7 @@ class PolynomialDecayLR(LearningRateSchedule):
 
 class WarmUpLR(LearningRateSchedule):
     r"""
-    Get learning rate warming up.
+    Gets learning rate warming up.
 
     For the i-th step, the formula of computing warmup_learning_rate[i] is:
 
diff --git a/mindspore/nn/metrics/__init__.py b/mindspore/nn/metrics/__init__.py
index aba31299f8..d3dad9eede 100755
--- a/mindspore/nn/metrics/__init__.py
+++ b/mindspore/nn/metrics/__init__.py
@@ -59,7 +59,7 @@ __factory__ = {
 
 def names():
     """
-    Get the names of the metric methods.
+    Gets the names of the metric methods.
 
     Returns:
         List, the name list of metric methods.
diff --git a/mindspore/nn/metrics/recall.py b/mindspore/nn/metrics/recall.py
index f3f8d89b3c..2ee6b5db84 100644
--- a/mindspore/nn/metrics/recall.py
+++ b/mindspore/nn/metrics/recall.py
@@ -23,7 +23,7 @@ from ._evaluation import EvaluationBase
 
 class Recall(EvaluationBase):
     r"""
-    Calculate recall for classification and multilabel data.
+    Calculates recall for classification and multilabel data.
 
     The recall class creates two local variables, :math:`\text{true_positive}` and :math:`\text{false_negative}`,
     that are used to compute the recall. This value is ultimately returned as the recall, an idempotent operation
diff --git a/mindspore/nn/optim/ada_grad.py b/mindspore/nn/optim/ada_grad.py
index 0bb67f4481..a779796246 100644
--- a/mindspore/nn/optim/ada_grad.py
+++ b/mindspore/nn/optim/ada_grad.py
@@ -37,7 +37,7 @@ def _check_param_value(accum, update_slots, prim_name=None):
 
 class Adagrad(Optimizer):
     """
-    Implement the Adagrad algorithm with ApplyAdagrad Operator.
+    Implements the Adagrad algorithm with ApplyAdagrad Operator.
 
     Adagrad is an online Learning and Stochastic Optimization.
     Refer to paper `Efficient Learning using Forward-Backward Splitting
diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py
index 12fd426b63..f6e86afd0c 100644
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -74,7 +74,7 @@ def _check_param(initial_accum, lr_power, l1, l2, use_locking, prim_name=None):
 
 class FTRL(Optimizer):
     """
-    Implement the FTRL algorithm with ApplyFtrl Operator.
+    Implements the FTRL algorithm with ApplyFtrl Operator.
 
     FTRL is an online convex optimization algorithm that adaptively chooses its regularization function
     based on the loss functions. Refer to paper `Adaptive Bound Optimization for Online Convex Optimization
diff --git a/mindspore/nn/optim/lazyadam.py b/mindspore/nn/optim/lazyadam.py
index 7f755b1392..ad976d7f24 100644
--- a/mindspore/nn/optim/lazyadam.py
+++ b/mindspore/nn/optim/lazyadam.py
@@ -104,9 +104,9 @@ def _check_param_value(beta1, beta2, eps, weight_decay, prim_name):
 
 class LazyAdam(Optimizer):
     r"""
-    Updates gradients by Adaptive Moment Estimation (Adam) algorithm.
+    This optimizer will apply a lazy adam algorithm when gradient is sparse.
 
-    The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
+    The original adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
 
     The updating formulas are as follows,
 
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index b9c4cb7cc0..6fed736f5a 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -733,7 +733,7 @@ class Unique(Primitive):
         - **x** (Tensor) - The input tensor.
 
     Outputs:
-        Tuple, containing Tensor objects `(y, idx)., `y` is a tensor with the
+        Tuple, containing Tensor objects `(y, idx), `y` is a tensor with the
         same type as `x`, and contains the unique elements in `x`, sorted in
         ascending order. `idx` is a tensor containing indices of elements in
         the input corresponding to the output tensor.
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 3b69b9e972..6548ebc104 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -2961,7 +2961,9 @@ class IsNan(PrimitiveWithInfer):
     Examples:
         >>> is_nan = ops.IsNan()
         >>> input_x = Tensor(np.array([np.log(-1), 1, np.log(0)]), mindspore.float32)
-        >>> result = is_nan(input_x)
+        >>> output = is_nan(input_x)
+        >>> print(output)
+        [True False False]
     """
 
     @prim_attr_register
@@ -2992,7 +2994,9 @@ class IsInf(PrimitiveWithInfer):
     Examples:
         >>> is_inf = ops.IsInf()
         >>> input_x = Tensor(np.array([np.log(-1), 1, np.log(0)]), mindspore.float32)
-        >>> result = is_inf(input_x)
+        >>> output = is_inf(input_x)
+        >>> print(output)
+        [False False True]
     """
 
     @prim_attr_register
@@ -3132,9 +3136,9 @@ class NPUGetFloatStatus(PrimitiveWithInfer):
         >>> alloc_status = ops.NPUAllocFloatStatus()
         >>> get_status = ops.NPUGetFloatStatus()
         >>> init = alloc_status()
-        >>> output = get_status(init)
-        >>> print(output)
-        [0. 0. 0. 0. 0. 0. 0. 0.]
+        >>> get_status(init)
+        >>> print(init)
+        [1. 1. 1. 1. 1. 1. 1. 1.]
     """
 
     @prim_attr_register
@@ -3179,9 +3183,9 @@ class NPUClearFloatStatus(PrimitiveWithInfer):
         >>> clear_status = ops.NPUClearFloatStatus()
         >>> init = alloc_status()
         >>> flag = get_status(init)
-        >>> output = clear_status(init)
-        >>> print(output)
-        [0. 0. 0. 0. 0. 0. 0. 0.]
+        >>> clear_status(init)
+        >>> print(init)
+        [1. 1. 1. 1. 1. 1. 1. 1.]
     """
 
     @prim_attr_register
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 820dd7beed..f27cd53f29 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -1512,7 +1512,14 @@ class MaxPool(_Pool):
     Examples:
         >>> input_tensor = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
         >>> maxpool_op = ops.MaxPool(padding="VALID", ksize=2, strides=1)
-        >>> output_tensor = maxpool_op(input_tensor)
+        >>> output = maxpool_op(input_tensor)
+        >>> print(output)
+        [[[[ 5.  6.  7.]
+           [ 9. 10. 11.]]
+          [[17. 18. 19.]
+           [21. 22. 23.]]
+          [[29. 30. 31.]
+           [33. 34. 35.]]]]
     """
 
     @prim_attr_register
@@ -1568,6 +1575,13 @@ class MaxPoolWithArgmax(_Pool):
         >>> input_tensor = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
         >>> maxpool_arg_op = ops.MaxPoolWithArgmax(padding="VALID", ksize=2, strides=1)
         >>> output_tensor, argmax = maxpool_arg_op(input_tensor)
+        >>> print(output_tensor)
+        [[[[ 5.  6.  7.]
+           [ 9. 10. 11.]]
+          [[17. 18. 19.]
+           [21. 22. 23.]]
+          [[29. 30. 31.]
+           [33. 34. 35.]]]]
     """
 
     def __init__(self, ksize=1, strides=1, padding="valid", data_format="NCHW"):
@@ -2315,7 +2329,9 @@ class SGD(PrimitiveWithCheck):
         >>> accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mindspore.float32)
         >>> momentum = Tensor(0.1, mindspore.float32)
         >>> stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mindspore.float32)
-        >>> result = sgd(parameters, gradient, learning_rate, accum, momentum, stat)
+        >>> output = sgd(parameters, gradient, learning_rate, accum, momentum, stat)
+        >>> print(output[0])
+        [ 1.9899   -0.4903   1.6952001  3.9801   ]
     """
 
     @prim_attr_register
@@ -2931,7 +2947,7 @@ class FastGelu(PrimitiveWithInfer):
     FastGelu is defined as follows:
 
     .. math::
-        \text{output} = \frac {x} {1 + \exp(-1.702 * \left| x \right|)} * \exp(0.851 * (x - \left| x \right|))`,
+        \text{output} = \frac {x} {1 + \exp(-1.702 * \left| x \right|)} * \exp(0.851 * (x - \left| x \right|)),
 
     where :math:`x` is the element of the input.
 
@@ -3466,8 +3482,8 @@ class ROIAlign(PrimitiveWithInfer):
     points. The details of (RoI) Align operator are described in `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_.
 
     Args:
-        pooled_height (int): The output features' height.
-        pooled_width (int): The output features' width.
+        pooled_height (int): The output features height.
+        pooled_width (int): The output features width.
         spatial_scale (float): A scaling factor that maps the raw image coordinates to the input
             feature map coordinates. Suppose the height of a RoI is `ori_h` in the raw image and `fea_h` in the
             input feature map, the `spatial_scale` must be `fea_h / ori_h`.
@@ -4046,7 +4062,7 @@ class FusedSparseFtrl(PrimitiveWithInfer):
         - **linear** (Tensor) - A Tensor with shape (1,).
 
     Supported Platforms:
-        ``CPU``
+        ``Ascend`` ``CPU``
 
     Examples:
         >>> import mindspore
@@ -4072,6 +4088,9 @@ class FusedSparseFtrl(PrimitiveWithInfer):
         >>> indices = Tensor(np.array([0, 1]).astype(np.int32))
         >>> output = net(grad, indices)
         >>> print(output)
+        (Tensor(shape=[1], dtype=Float32, value= [0.00000000e+00]),
+         Tensor(shape=[1], dtype=Float32, value= [0.00000000e+00]),
+         Tensor(shape=[1], dtype=Float32, value= [0.00000000e+00]))
     """
     __mindspore_signature__ = (
         sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
@@ -4155,6 +4174,7 @@ class FusedSparseProximalAdagrad(PrimitiveWithInfer):
     Examples:
         >>> import numpy as np
         >>> import mindspore.nn as nn
+        >>> import mindspore.common.dtype as mstype
         >>> from mindspore import Tensor, Parameter
         >>> from mindspore.ops import operations as ops
         >>> class Net(nn.Cell):
@@ -4176,6 +4196,8 @@ class FusedSparseProximalAdagrad(PrimitiveWithInfer):
         >>> indices = Tensor(np.array([0, 1]).astype(np.int32))
         >>> output = net(grad, indices)
         >>> print(output)
+        (Tensor(shape=[1], dtype=Float32, value= [0.00000000e+00]),
+         Tensor(shape=[1], dtype=Float32, value= [0.00000000e+00]))
     """
     __mindspore_signature__ = (
         sig.make_sig('var', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
@@ -4225,9 +4247,9 @@ class KLDivLoss(PrimitiveWithInfer):
 
         .. math::
             \ell(x, y) = \begin{cases}
-            L, & \text{if reduction} = \text{`none';}\\
-            \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\
-            \operatorname{sum}(L),  & \text{if reduction} = \text{`sum'.}
+            L, & \text{if reduction} = \text{'none';}\\
+            \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
+            \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
             \end{cases}
 
     Args:
@@ -4302,9 +4324,9 @@ class BinaryCrossEntropy(PrimitiveWithInfer):
 
         .. math::
             \ell(x, y) = \begin{cases}
-            L, & \text{if reduction} = \text{`none';}\\
-            \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\
-            \operatorname{sum}(L),  & \text{if reduction} = \text{`sum'.}
+            L, & \text{if reduction} = \text{'none';}\\
+            \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
+            \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
             \end{cases}
 
     Args:
@@ -6102,10 +6124,13 @@ class CTCLoss(PrimitiveWithInfer):
         >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32)
         >>> ctc_loss = ops.CTCLoss()
         >>> loss, gradient = ctc_loss(inputs, labels_indices, labels_values, sequence_length)
-        >>> print(loss.shape)
-        (2,)
-        >>> print(gradient.shape)
-        (2, 2, 3)
+        >>> print(loss)
+        [ 0.69121575  0.5381993 ]
+        >>> print(gradient)
+        [[[ 0.25831494  0.3623634  -0.62067937 ]
+          [ 0.25187883  0.2921483  -0.5440271 ]]
+         [[ 0.43522435  0.24408469  0.07787037 ]
+          [ 0.29642645  0.4232373   0.06138104 ]]]
     """
 
     @prim_attr_register