!11281 fix bugs of op NPUClearFloatStatus, ApplyFtrl, SGD and so on

From: @lihongkang1 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghui
4 years ago · bbdb5500e5
--- a/mindspore/compression/common/constant.py
+++ b/mindspore/compression/common/constant.py
@@ -66,7 +66,7 @@ class QuantDtype(enum.Enum):
    @staticmethod
    def switch_signed(dtype):
        """
        Swicth the signed state of the input quant datatype.
        Switch the signed state of the input quant datatype.

        Args:
            dtype (QuantDtype): quant datatype.
--- a/mindspore/compression/quant/qat.py
+++ b/mindspore/compression/quant/qat.py
@@ -43,10 +43,10 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
                        symmetric=(False, False),
                        narrow_range=(False, False)):
    r"""
    Configs the oberser type of weights and data flow with quant params.
    Configs the observer type of weights and data flow with quant params.

    Args:
        quant_observer (Observer, list or tuple): The oberser type to do quantization. The first element represent
        quant_observer (Observer, list or tuple): The observer type to do quantization. The first element represent
            weights and second element represent data flow.
            Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver)
        quant_delay (int, list or tuple): Number of steps after which weights and activations are quantized during
@@ -64,7 +64,7 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
            The first element represents weights and the second element represents data flow. Default: (False, False)

    Returns:
        QuantConfig, Contains the oberser type of weight and activation.
        QuantConfig, Contains the observer type of weight and activation.
    """
    weight_observer = quant_observer[0].partial_init(quant_delay=quant_delay[0], quant_dtype=quant_dtype[0],
                                                     per_channel=per_channel[0], symmetric=symmetric[0],
--- a/mindspore/compression/quant/quant_utils.py
+++ b/mindspore/compression/quant/quant_utils.py
@@ -273,7 +273,7 @@ def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_param
    Args:
        quant_model: quantization model.
        params_dict: parameter dict that stores fp32 parameters.
        quant_new_params: parameters that exist in quantative network but not in unquantative network.
        quant_new_params: parameters that exist in quantitative network but not in unquantitative network.

    Returns:
        None
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -452,7 +452,7 @@ def reset_auto_parallel_context():


 def _check_target_specific_cfgs(device, arg_key):
    """Checking whether a config is sutable for a specified device"""
    """Checking whether a config is suitable for a specified device"""
    device_cfgs = {
        'enable_auto_mixed_precision': ['Ascend'],
        'enable_dump': ['Ascend'],
@@ -545,7 +545,7 @@ def set_context(**kwargs):
            - op_trace: collect single operator performance data.

            The profiling can choose the combination of `training_trace`, `task_trace`,
            `training_trace` and `task_trace` combination, and eparated by colons;
            `training_trace` and `task_trace` combination, and separated by colons;
            a single operator can choose `op_trace`, `op_trace` cannot be combined with
            `training_trace` and `task_trace`. Default: "training_trace".
        check_bprop (bool): Whether to check bprop. Default: False.
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@@ -553,7 +553,7 @@ class Pad(Cell):
        - If `mode` is "CONSTANT", it fills the edge with 0, regardless of the values of the `input_x`.
          If the `input_x` is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the
          Outputs is [[0,0,0,0,0,0,0], [0,0,1,2,3,0,0], [0,0,4,5,6,0,0], [0,0,7,8,9,0,0], [0,0,0,0,0,0,0]].
        - If `mode` is "REFLECT", it uses a way of symmetrical copying throught the axis of symmetry to fill in.
        - If `mode` is "REFLECT", it uses a way of symmetrical copying through the axis of symmetry to fill in.
          If the `input_x` is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the
          Outputs is [[6,5,4,5,6,5,4], [3,2,1,2,3,2,1], [6,5,4,5,6,5,4], [9,8,7,8,9,8,7], [6,5,4,5,6,5,4]].
        - If `mode` is "SYMMETRIC", the filling method is similar to the "REFLECT". It is also copied
--- a/mindspore/nn/layer/lstm.py
+++ b/mindspore/nn/layer/lstm.py
@@ -99,7 +99,7 @@ class LSTM(Cell):
          Data type of `hx` must be the same as `input`.

    Outputs:
        Tuple, a tuple constains (`output`, (`h_n`, `c_n`)).
        Tuple, a tuple contains (`output`, (`h_n`, `c_n`)).

        - **output** (Tensor) - Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`).
        - **hx_n** (tuple) - A tuple of two Tensor (h_n, c_n) both of shape
--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@@ -162,7 +162,7 @@ class Range(Cell):

 class LGamma(Cell):
    r"""
    Calculates LGamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
    Calculates LGamma using Lanczos' approximation referring to "A Precision Approximation of the Gamma Function".
    The algorithm is:

    .. math::
@@ -291,7 +291,7 @@ class LGamma(Cell):

 class DiGamma(Cell):
    r"""
    Calculates Digamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
    Calculates Digamma using Lanczos' approximation referring to "A Precision Approximation of the Gamma Function".
    The algorithm is:

    .. math::
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -235,7 +235,7 @@ class BatchNorm1d(_BatchNorm):

    Note:
        The implementation of BatchNorm is different in graph mode and pynative mode, therefore the mode is not
        recommended to be changed after net was initilized.
        recommended to be changed after net was initialized.

    Args:
        num_features (int): `C` from an expected input of size (N, C).
@@ -322,7 +322,7 @@ class BatchNorm2d(_BatchNorm):

    Note:
        The implementation of BatchNorm is different in graph mode and pynative mode, therefore that mode can not be
        changed after net was initilized.
        changed after net was initialized.
        Note that the formula for updating the running_mean and running_var is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times x_t + \text{momentum} \times \hat{x}`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the new observed value.
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -111,7 +111,7 @@ def _partial_init(cls_or_self, **kwargs):
    This can be useful when there is a need to create classes with the same
    constructor arguments, but different instances.

    Example::
    Examples:
        >>> Foo.partial_init = classmethod(_partial_init)
        >>> foo_builder = Foo.partial_init(a=3, b=4).partial_init(answer=42)
        >>> foo_instance1 = foo_builder()
@@ -365,7 +365,7 @@ class Conv2dBnFoldQuantOneConv(Cell):
        var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
            variance vector. Default: 'ones'.
        fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
        quant_config (QuantConfig): Configs the oberser types and quant settings of weight and activation. Can be
        quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -565,7 +565,7 @@ class Conv2dBnFoldQuant(Cell):
        var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
            variance vector. Default: 'ones'.
        fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
        quant_config (QuantConfig): Configs the oberser types and quant settings of weight and activation. Can be
        quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -743,7 +743,7 @@ class Conv2dBnWithoutFoldQuant(Cell):
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
            Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
        quant_config (QuantConfig): Configs the oberser types and quant settings of weight and activation. Can be
        quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -856,7 +856,7 @@ class Conv2dQuant(Cell):
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
            Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
        quant_config (QuantConfig): Configs the oberser types and quant settings of weight and activation. Can be
        quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -962,7 +962,7 @@ class DenseQuant(Cell):
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
        activation (Union[str, Cell, Primitive]): The regularization function applied to the output of the layer,
            eg. 'relu'. Default: None.
        quant_config (QuantConfig): Configs the oberser types and quant settings of weight and activation. Can be
        quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -1073,7 +1073,7 @@ class ActQuant(_QuantActivation):
        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        fake_before (bool): Whether add fake quantized operation before activation. Default: False.
        quant_config (QuantConfig): Configs the oberser types and quant settings of weight and activation. Can be
        quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -1138,7 +1138,7 @@ class TensorAddQuant(Cell):

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        quant_config (QuantConfig): Configs the oberser types and quant settings of weight and activation. Can be
        quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@@ -1190,7 +1190,7 @@ class MulQuant(Cell):

    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
        quant_config (QuantConfig): Configs the oberser types and quant settings of weight and activation. Can be
        quant_config (QuantConfig): Configs the observer types and quant settings of weight and activation. Can be
            generated by compression.quant.create_quant_config method.
            Default: both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
--- a/mindspore/nn/metrics/metric.py
+++ b/mindspore/nn/metrics/metric.py
@@ -59,7 +59,7 @@ class Metric(metaclass=ABCMeta):
            data (numpy.array): Input data.

        Returns:
            bool, return trun, if input data are one-hot encoding.
            bool, return true, if input data are one-hot encoding.
        """
        if data.ndim > 1 and np.equal(data ** 2, data).all():
            shp = (data.shape[0],) + data.shape[2:]
--- a/mindspore/nn/optim/optimizer.py
+++ b/mindspore/nn/optim/optimizer.py
@@ -260,7 +260,7 @@ class Optimizer(Cell):
        return gradients

    def _grad_sparse_indices_deduplicate(self, gradients):
        """ In the case of using big operators, de duplicate the 'indexes' in gradients."""
        """ In the case of using big operators, deduplicate the 'indexes' in gradients."""
        if self._target != 'CPU' and self._unique:
            gradients = self.map_(F.partial(_indices_deduplicate), gradients)
        return gradients
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -78,7 +78,7 @@ class RMSProp(Optimizer):
    :math:`m_{t}` is moment, the delta of `w`, :math:`m_{t-1}` is the last moment of :math:`m_{t}`.
    :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`.
    :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
    :math:`\\eta` is learning rate, represents `learning_rate`. :math:`\\nabla Q_{i}(w)` is gradientse,
    :math:`\\eta` is learning rate, represents `learning_rate`. :math:`\\nabla Q_{i}(w)` is gradients,
    represents `gradients`.

    Note:
--- a/mindspore/nn/probability/bijector/bijector.py
+++ b/mindspore/nn/probability/bijector/bijector.py
@@ -253,7 +253,7 @@ class Bijector(Cell):
        If args[0] is a distribution instance, the call will generate a new distribution derived from
        the input distribution.
        Otherwise, input[0] must be the name of a Bijector function, e.g. "forward", then this call will
        go in the construct and invoke the correstpoding Bijector function.
        go in the construct and invoke the corresponding Bijector function.

        Args:
            *args: args[0] shall be either a distribution or the name of a Bijector function.
--- a/mindspore/nn/probability/distribution/_utils/custom_ops.py
+++ b/mindspore/nn/probability/distribution/_utils/custom_ops.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Utitly functions to help distribution class."""
 """Utility functions to help distribution class."""
 import numpy as np
 from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype

 def exp_generic(input_x):
    """
    Log op on Ascend doesn't supprot int types.
    Log op on Ascend doesn't support int types.
    Fix this with casting the type.
    """
    exp = P.Exp()
@@ -36,7 +36,7 @@ def log_generic(input_x):
    """
    Log op on Ascend is calculated as log(abs(x)).
    Fix this with putting negative values as nan.
    And log op on Ascend doesn't supprot int types.
    And log op on Ascend doesn't support int types.
    Fix this with casting the type.
    """
    log = P.Log()
--- a/mindspore/nn/probability/distribution/_utils/utils.py
+++ b/mindspore/nn/probability/distribution/_utils/utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Utitly functions to help distribution class."""
 """Utility functions to help distribution class."""
 import numpy as np
 from mindspore import context
 from mindspore._checkparam import Validator as validator
--- a/mindspore/nn/probability/distribution/beta.py
+++ b/mindspore/nn/probability/distribution/beta.py
@@ -52,7 +52,7 @@ class Beta(Distribution):
        >>> from mindspore import Tensor
        >>> # To initialize a Beta distribution of the concentration1 3.0 and the concentration0 4.0.
        >>> b1 = msd.Beta([3.0], [4.0], dtype=mindspore.float32)
        >>> # A Beta distribution can be initilized without arguments.
        >>> # A Beta distribution can be initialized without arguments.
        >>> # In this case, `concentration1` and `concentration0` must be passed in through arguments.
        >>> b2 = msd.Beta(dtype=mindspore.float32)
        >>> # Here are some tensors used below for testing
--- a/mindspore/nn/probability/distribution/gamma.py
+++ b/mindspore/nn/probability/distribution/gamma.py
@@ -52,7 +52,7 @@ class Gamma(Distribution):
        >>> from mindspore import Tensor
        >>> # To initialize a Gamma distribution of the concentration 3.0 and the rate 4.0.
        >>> g1 = msd.Gamma([3.0], [4.0], dtype=mindspore.float32)
        >>> # A Gamma distribution can be initilized without arguments.
        >>> # A Gamma distribution can be initialized without arguments.
        >>> # In this case, `concentration` and `rate` must be passed in through arguments.
        >>> g2 = msd.Gamma(dtype=mindspore.float32)
        >>> # Here are some tensors used below for testing
--- a/mindspore/nn/probability/distribution/geometric.py
+++ b/mindspore/nn/probability/distribution/geometric.py
@@ -26,8 +26,9 @@ from ._utils.custom_ops import exp_generic, log_generic
 class Geometric(Distribution):
    """
    Geometric Distribution.
    It represents that there are k failures before the first sucess, namely taht there are in total k+1 Bernoulli trials
    when the first success is achieved.

    It represents that there are k failures before the first success, namely that there are in total k+1 Bernoulli
    trials when the first success is achieved.

    Args:
        probs (float, list, numpy.ndarray, Tensor): The probability of success.
--- a/mindspore/nn/probability/distribution/log_normal.py
+++ b/mindspore/nn/probability/distribution/log_normal.py
@@ -235,7 +235,7 @@ class LogNormal(msd.TransformedDistribution):

    def _var(self, loc=None, scale=None):
        """
        The varience of the distribution.
        The variance of the distribution.
        """
        mean, sd = self._check_param_type(loc, scale)
        var = self.distribution("var", mean=mean, sd=sd)
--- a/mindspore/nn/probability/distribution/poisson.py
+++ b/mindspore/nn/probability/distribution/poisson.py
@@ -48,7 +48,7 @@ class Poisson(Distribution):
        >>> from mindspore import Tensor
        >>> # To initialize an Poisson distribution of the rate 0.5.
        >>> p1 = msd.Poisson([0.5], dtype=mindspore.float32)
        >>> # An Poisson distribution can be initilized without arguments.
        >>> # An Poisson distribution can be initialized without arguments.
        >>> # In this case, `rate` must be passed in through `args` during function calls.
        >>> p2 = msd.Poisson(dtype=mindspore.float32)
        >>>
--- a/mindspore/nn/probability/distribution/transformed_distribution.py
+++ b/mindspore/nn/probability/distribution/transformed_distribution.py
@@ -33,7 +33,7 @@ class TransformedDistribution(Distribution):
        bijector (Bijector): The transformation to perform.
        distribution (Distribution): The original distribution. Must has a float dtype.
        seed (int): The seed is used in sampling. The global seed is used if it is None. Default:None.
          If this seed is given when a TransformedDistribution object is initialised, the object's sampling function
          If this seed is given when a TransformedDistribution object is initialized, the object's sampling function
          will use this seed; elsewise, the underlying distribution's seed will be used.
        name (str): The name of the transformed distribution. Default: 'transformed_distribution'.

--- a/mindspore/nn/probability/distribution/uniform.py
+++ b/mindspore/nn/probability/distribution/uniform.py
@@ -240,7 +240,7 @@ class Uniform(Distribution):

    def _cross_entropy(self, dist, low_b, high_b, low=None, high=None):
        """
        Evaluate cross entropy between Uniform distributoins.
        Evaluate cross entropy between Uniform distributions.

        Args:
            dist (str): The type of the distributions. Should be "Uniform" in this case.
--- a/mindspore/ops/composite/multitype_ops/div_impl.py
+++ b/mindspore/ops/composite/multitype_ops/div_impl.py
@@ -33,7 +33,7 @@ def _div_scalar(x, y):

    Args:
       x (Number): x
       y (NUmber): y
       y (Number): y

    Returns:
       Number, equal to x / y, the type is same as x.
--- a/mindspore/ops/composite/multitype_ops/equal_impl.py
+++ b/mindspore/ops/composite/multitype_ops/equal_impl.py
@@ -48,7 +48,7 @@ def _equal_scalar(x, y):

    Args:
       x (Number): first input number.
       y (NUmber): second input number.
       y (Number): second input number.

    Returns:
       bool, if x == y return true, x != y return false.
--- a/mindspore/ops/composite/multitype_ops/getitem_impl.py
+++ b/mindspore/ops/composite/multitype_ops/getitem_impl.py
@@ -194,7 +194,7 @@ def _tensor_getitem_by_slice(data, slice_index):
@getitem.register("Tensor", "Tensor")
 def _tensor_getitem_by_tensor(data, tensor_index):
    """
    Getting item of tensor by tensor indice.
    Getting item of tensor by tensor indices.

    Inputs:
        data (Tensor): A tensor.
--- a/mindspore/ops/composite/multitype_ops/mul_impl.py
+++ b/mindspore/ops/composite/multitype_ops/mul_impl.py
@@ -62,7 +62,7 @@ def _scalar_mul_tensor(x, y):
@mul.register("Tensor", "Number")
 def _tensor_mul_scalar(x, y):
    """
    Returns x * y where x is a tensor and y is a scalar. x and y hava same dtype.
    Returns x * y where x is a tensor and y is a scalar. x and y have same dtype.

    Outputs:
        Tensor, has the same dtype as x.
--- a/mindspore/ops/composite/multitype_ops/not_equal_impl.py
+++ b/mindspore/ops/composite/multitype_ops/not_equal_impl.py
@@ -33,7 +33,7 @@ def _not_equal_scalar(x, y):

    Args:
       x (Number): x
       y (NUmber): y
       y (Number): y

    Returns:
       bool, if x != y return true, x == y return false.
@@ -123,7 +123,7 @@ def _none_not_equal_scalar(x, y):

    Args:
       x: None.
       y: NUmber.
       y: Number.

    Returns:
       bool, return True.
--- a/mindspore/ops/composite/multitype_ops/setitem_impl.py
+++ b/mindspore/ops/composite/multitype_ops/setitem_impl.py
@@ -28,7 +28,7 @@ def _list_setitem_with_string(data, number_index, value):
    Assigns value to list.

    Inputs:
        data (list): Data of type lis.
        data (list): Data of type list.
        number_index (Number): Index of data.

    Outputs:
@@ -43,7 +43,7 @@ def _list_setitem_with_number(data, number_index, value):
    Assigns value to list.

    Inputs:
        data (list): Data of type lis.
        data (list): Data of type list.
        number_index (Number): Index of data.
        value (Number): Value given.

@@ -59,7 +59,7 @@ def _list_setitem_with_Tensor(data, number_index, value):
    Assigns value to list.

    Inputs:
        data (list): Data of type lis.
        data (list): Data of type list.
        number_index (Number): Index of data.
        value (Tensor): Value given.

@@ -75,7 +75,7 @@ def _list_setitem_with_List(data, number_index, value):
    Assigns value to list.

    Inputs:
        data (list): Data of type lis.
        data (list): Data of type list.
        number_index (Number): Index of data.
        value (list): Value given.

@@ -91,7 +91,7 @@ def _list_setitem_with_Tuple(data, number_index, value):
    Assigns value to list.

    Inputs:
        data (list): Data of type lis.
        data (list): Data of type list.
        number_index (Number): Index of data.
        value (list): Value given.

--- a/mindspore/ops/composite/random_ops.py
+++ b/mindspore/ops/composite/random_ops.py
@@ -81,7 +81,7 @@ def laplace(shape, mean, lambda_param, seed=None):
        shape (tuple): The shape of random tensor to be generated.
        mean (Tensor): The mean μ distribution parameter, which specifies the location of the peak.
          With float32 data type.
        lambda_param (Tensor): The parameter used for controling the variance of this random distribution. The
        lambda_param (Tensor): The parameter used for controlling the variance of this random distribution. The
          variance of Laplace distribution is equal to twice the square of lambda_param. With float32 data type.
        seed (int): Seed is used as entropy source for Random number engines generating pseudo-random numbers.
          Default: None, which will be treated as 0.
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -141,7 +141,6 @@ class ExpandDims(PrimitiveWithInfer):

    Inputs:
        - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
          The data type should be one of the following types: int32, float16, float32.
        - **axis** (int) - Specifies the dimension index at which to expand
          the shape of `input_x`. The value of axis must be in the range
          `[-input_x.ndim-1, input_x.ndim]`. Only constant value is allowed.
@@ -939,7 +938,7 @@ class Split(PrimitiveWithCheck):

    Args:
        axis (int): Index of the split position. Default: 0.
        output_num (int): The number of output tensors. Must be postive int. Default: 1.
        output_num (int): The number of output tensors. Must be positive int. Default: 1.

    Raises:
        ValueError: If `axis` is out of the range [-len(`input_x.shape`), len(`input_x.shape`)),
@@ -1466,7 +1465,7 @@ class InvertPermutation(PrimitiveWithInfer):
        - **input_x** (Union(tuple[int], list[int]) - The input is constructed by multiple
          integers, i.e., :math:`(y_1, y_2, ..., y_S)` representing the indices.
          The values must include 0. There can be no duplicate values or negative values.
          Only constant value is allowed. The maximum value msut be equal to length of input_x.
          Only constant value is allowed. The maximum value must be equal to length of input_x.

    Outputs:
        tuple[int]. It has the same length as the input.
@@ -1927,7 +1926,7 @@ class UnsortedSegmentMin(PrimitiveWithCheck):
          The data type must be float16, float32 or int32.
        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`, the value must be >= 0.
          The data type must be int32.
        - **num_segments** (int) - The value spcifies the number of distinct `segment_ids`.
        - **num_segments** (int) - The value specifies the number of distinct `segment_ids`.

    Note:
        If the segment_id i is absent in the segment_ids, then output[i] will be filled with
@@ -1983,7 +1982,7 @@ class UnsortedSegmentMax(PrimitiveWithCheck):
          The data type must be float16, float32 or int32.
        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`, the value must be >= 0.
          The data type must be int32.
        - **num_segments** (int) - The value spcifies the number of distinct `segment_ids`.
        - **num_segments** (int) - The value specifies the number of distinct `segment_ids`.

    Note:
        If the segment_id i is absent in the segment_ids, then output[i] will be filled with
@@ -2040,7 +2039,7 @@ class UnsortedSegmentProd(PrimitiveWithInfer):
          With float16, float32 or int32 data type.
        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`, the value must be >= 0.
          Data type must be int32.
        - **num_segments** (int) - The value spcifies the number of distinct `segment_ids`,
        - **num_segments** (int) - The value specifies the number of distinct `segment_ids`,
          must be greater than 0.

    Outputs:
@@ -2505,7 +2504,7 @@ class Select(PrimitiveWithInfer):

    If neither is None, :math:`x` and :math:`y` must have the same shape. If :math:`x` and :math:`y` are
    scalars, the conditional tensor must be a scalar. If :math:`x` and :math:`y` are
    higher-demensional vectors, the `condition` must be a vector whose size matches the
    higher-dimensional vectors, the `condition` must be a vector whose size matches the
    first dimension of :math:`x`, or must have the same shape as :math:`y`.

    The conditional tensor acts as an optional compensation (mask), which
@@ -2513,7 +2512,7 @@ class Select(PrimitiveWithInfer):
    selected from :math:`x` (if true) or :math:`y` (if false) based on the value of each
    element.

    If condition is a vector, then :math:`x` and :math:`y` are higher-demensional matrices, then it
    If condition is a vector, then :math:`x` and :math:`y` are higher-dimensional matrices, then it
    chooses to copy that row (external dimensions) from :math:`x` and :math:`y`. If condition has
    the same shape as :math:`x` and :math:`y`, you can choose to copy these elements from :math:`x`
    and :math:`y`.
@@ -2629,7 +2628,7 @@ class StridedSlice(PrimitiveWithInfer):

    Given an input tensor, this operation inserts a dimension of length 1 at the dimension.
    This operation extracts a fragment of size (end-begin)/stride from the given 'input_tensor'.
    Starting from the begining position, the fragment continues adding stride to the index until
    Starting from the beginning position, the fragment continues adding stride to the index until
    all dimensions are not less than the ending position.

    Note:
@@ -3906,7 +3905,7 @@ class SpaceToBatchND(PrimitiveWithInfer):

    Args:
        block_shape (Union[list(int), tuple(int)]): The block shape of dividing block with all value greater than 1.
            The length of `block_shape` is M correspoding to the number of spatial dimensions. M must be 2.
            The length of `block_shape` is M corresponding to the number of spatial dimensions. M must be 2.
        paddings (Union[tuple, list]): The padding values for H and W dimension, containing 2 subtraction list.
            Each contains 2 integer value. All values must be greater than 0.
            `paddings[i]` specifies the paddings for the spatial dimension i,
@@ -4005,7 +4004,7 @@ class BatchToSpaceND(PrimitiveWithInfer):

    Args:
        block_shape (Union[list(int), tuple(int)]): The block shape of dividing block with all value >= 1.
            The length of block_shape is M correspoding to the number of spatial dimensions. M must be 2.
            The length of block_shape is M corresponding to the number of spatial dimensions. M must be 2.
        crops (Union[list(int), tuple(int)]): The crop value for H and W dimension, containing 2 subtraction list,
            each containing 2 int value.
            All values must be >= 0. crops[i] specifies the crop values for spatial dimension i, which corresponds to
@@ -4404,7 +4403,7 @@ class ReverseSequence(PrimitiveWithInfer):

 class EditDistance(PrimitiveWithInfer):
    """
    Computes the Levebshtein Edit Distance. It is used to measure the similarity of two sequences. The inputs are
    Computes the Levenshtein Edit Distance. It is used to measure the similarity of two sequences. The inputs are
    variable-length sequences provided by SparseTensors (hypothesis_indices, hypothesis_values, hypothesis_shape)
    and (truth_indices, truth_values, truth_shape).

--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -1309,7 +1309,7 @@ class SquaredDifference(_MathBinaryOp):
        - **input_x** (Union[Tensor, Number, bool]) - The first input is a number, or a bool,
          or a tensor whose data type is float16, float32, int32 or bool.
        - **input_y** (Union[Tensor, Number, bool]) - The second input is a number, or a bool when the first input
          is a tensor or a tensor whose data type isfloat16, float32, int32 or bool.
          is a tensor or a tensor whose data type is float16, float32, int32 or bool.

    Outputs:
        Tensor, the shape is the same as the one after broadcasting,
@@ -3036,7 +3036,7 @@ class IsInf(PrimitiveWithInfer):

 class IsFinite(PrimitiveWithInfer):
    """
    Deternubes which elements are finite for each position.
    Determines which elements are finite for each position.

    Inputs:
        - **input_x** (Tensor) - The input tensor.
@@ -3160,6 +3160,8 @@ class NPUGetFloatStatus(PrimitiveWithInfer):
        >>> get_status = ops.NPUGetFloatStatus()
        >>> init = alloc_status()
        >>> get_status(init)
        Tensor(shape=[8], dtype=Float32, value= [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00])
        >>> print(init)
        [1. 1. 1. 1. 1. 1. 1. 1.]
    """
@@ -3207,6 +3209,8 @@ class NPUClearFloatStatus(PrimitiveWithInfer):
        >>> init = alloc_status()
        >>> flag = get_status(init)
        >>> clear_status(init)
        Tensor(shape=[8], dtype=Float32, value= [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00])
        >>> print(init)
        [1. 1. 1. 1. 1. 1. 1. 1.]
    """
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2013,7 +2013,8 @@ class TopK(PrimitiveWithInfer):
        >>> k = 3
        >>> values, indices = topk(input_x, k)
        >>> print((values, indices))
        ([5.0, 4.0, 3.0], [4, 3, 2])
        (Tensor(shape=[3], dtype=Float16, value= [ 5.0000e+00,  4.0000e+00,  3.0000e+00]), Tensor(shape=[3],
          dtype=Int32, value= [4, 3, 2]))
    """

    @prim_attr_register
@@ -2217,7 +2218,7 @@ class ApplyMomentum(PrimitiveWithInfer):
    Data type conversion of Parameter is not supported. RuntimeError exception will be thrown.

    Args:
        use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
        use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors
                            from being updated. Default: False.
        use_nesterov (bool): Enable Nesterov momentum. Default: False.
        gradient_scale (float): The scale of the gradient. Default: 1.0.
@@ -2513,7 +2514,7 @@ class SGD(PrimitiveWithCheck):
        >>> stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mindspore.float32)
        >>> output = sgd(parameters, gradient, learning_rate, accum, momentum, stat)
        >>> print(output[0])
        [ 1.9899   -0.4903   1.6952001  3.9801   ]
        (Tensor(shape=[4], dtype=Float32, value= [ 1.98989999e+00, -4.90300000e-01,  1.69520009e+00,  3.98009992e+00]),)
    """

    @prim_attr_register
@@ -5759,7 +5760,7 @@ class ApplyProximalGradientDescent(PrimitiveWithInfer):

    Inputs:
        - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
        - **alpha** (Union[Number, Tensor]) - Saling factor, must be a scalar. With float32 or float16 data type.
        - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be scalar.
          With float32 or float16 data type.
        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be scalar.
@@ -5951,7 +5952,7 @@ class ApplyFtrl(PrimitiveWithInfer):

        There is only one output for GPU environment.

        - **var** (Tensor) - This value is alwalys zero and the input parameters has been updated in-place.
        - **var** (Tensor) - This value is always zero and the input parameters has been updated in-place.

    Supported Platforms:
        ``Ascend`` ``GPU``
@@ -5994,7 +5995,7 @@ class ApplyFtrl(PrimitiveWithInfer):
         [ 1.43758726e+00,  9.89177322e+00]]), Tensor(shape=[2, 2], dtype=Float32, value=
        [[-1.86994812e+03, -1.64906018e+03],
         [-3.22187836e+02, -1.20163989e+03]]))
         >>> else:
         ... else:
         ...    print(net.var.asnumpy())
         [[0.4614181  0.5309642 ]
          [0.2687151  0.38206503]]
@@ -6321,6 +6322,7 @@ class CTCLoss(PrimitiveWithInfer):
        ``Ascend`` ``GPU``

    Examples:
        >>> np.random.seed(0)
        >>> inputs = Tensor(np.random.random((2, 2, 3)), mindspore.float32)
        >>> labels_indices = Tensor(np.array([[0, 0], [1, 0]]), mindspore.int64)
        >>> labels_values = Tensor(np.array([2, 2]), mindspore.int32)
@@ -6328,12 +6330,12 @@ class CTCLoss(PrimitiveWithInfer):
        >>> ctc_loss = ops.CTCLoss()
        >>> loss, gradient = ctc_loss(inputs, labels_indices, labels_values, sequence_length)
        >>> print(loss)
        [ 0.69121575  0.5381993 ]
        [ 0.7864997  0.720426 ]
        >>> print(gradient)
        [[[ 0.25831494  0.3623634  -0.62067937 ]
          [ 0.25187883  0.2921483  -0.5440271 ]]
         [[ 0.43522435  0.24408469  0.07787037 ]
          [ 0.29642645  0.4232373   0.06138104 ]]]
        [[[ 0.30898064  0.36491138  -0.673892  ]
          [ 0.33421117  0.2960548  -0.63026595 ]]
         [[ 0.23434742  0.36907154  0.11261538 ]
          [ 0.27316454  0.41090325  0.07584976 ]]]
    """

    @prim_attr_register
--- a/mindspore/ops/operations/other_ops.py
+++ b/mindspore/ops/operations/other_ops.py
@@ -564,7 +564,7 @@ class PopulationCount(PrimitiveWithInfer):
        - **input** (Tensor) -  The data type must be int16 or uint16.

    Outputs:
        Tensor, with the sam  shape as the input.
        Tensor, with the same shape as the input.

    Supported Platforms:
        ``Ascend``
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -513,11 +513,11 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs):
        file_name (str): File name of the model to be exported.
        file_format (str): MindSpore currently supports 'AIR', 'ONNX' and 'MINDIR' format for exported model.

            - AIR: Ascend Intermidiate Representation. An intermidiate representation format of Ascend model.
            - AIR: Ascend Intermediate Representation. An intermediate representation format of Ascend model.
              Recommended suffix for output file is '.air'.
            - ONNX: Open Neural Network eXchange. An open format built to represent machine learning models.
              Recommended suffix for output file is '.onnx'.
            - MINDIR: MindSpore Native Intermidiate Representation for Anf. An intermidiate representation format
            - MINDIR: MindSpore Native Intermediate Representation for Anf. An intermediate representation format
              for MindSpore models.
              Recommended suffix for output file is '.mindir'.