update doc string in distribution and bijector classes

5 years ago · cb1e02fcd9
--- a/mindspore/nn/probability/bijector/bijector.py
+++ b/mindspore/nn/probability/bijector/bijector.py
@@ -20,6 +20,7 @@ from ..distribution._utils.utils import CheckTensor
 from ..distribution import Distribution
 from ..distribution import TransformedDistribution


 class Bijector(Cell):
    """
    Bijecotr class.
@@ -28,22 +29,23 @@ class Bijector(Cell):
        is_constant_jacobian (bool): Whether the Bijector has constant derivative. Default: False.
        is_injective (bool): Whether the Bijector is a one-to-one mapping. Default: True.
        name (str): The name of the Bijector. Default: None.
        dtype (mindspore.dtype): The type of the distribution the Bijector can operate on. Default: None.
        dtype (mindspore.dtype): The type of the distributions that the Bijector can operate on. Default: None.
        param (dict): The parameters used to initialize the Bijector. Default: None.
    """

    def __init__(self,
                 is_constant_jacobian=False,
                 is_injective=True,
                 name=None,
                 dtype=None,
                 param=None):

        """
        Constructor of bijector class.
        Constructor of Bijector class.
        """
        super(Bijector, self).__init__()
        validator.check_value_type('name', name, [str], type(self).__name__)
        validator.check_value_type('is_constant_jacobian', is_constant_jacobian, [bool], name)
        validator.check_value_type(
            'is_constant_jacobian', is_constant_jacobian, [bool], name)
        validator.check_value_type('is_injective', is_injective, [bool], name)
        self._name = name
        self._dtype = dtype
--- a/mindspore/nn/probability/bijector/exp.py
+++ b/mindspore/nn/probability/bijector/exp.py
@@ -15,6 +15,7 @@
 """Power Bijector"""
 from .power_transform import PowerTransform


 class Exp(PowerTransform):
    r"""
    Exponential Bijector.
@@ -27,24 +28,25 @@ class Exp(PowerTransform):
        name (str): The name of the Bijector. Default: 'Exp'.

    Examples:
        >>> # To initialize an Exp bijector
        >>> # To initialize an Exp bijector.
        >>> import mindspore.nn.probability.bijector as msb
        >>> n = msb.Exp()
        >>>
        >>> # To use Exp bijector in a network
        >>> # To use an Exp bijector in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
        >>>         self.e1 = msb.Exp()
        >>>
        >>>     def construct(self, value):
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'forward' with the name of the function
        >>>         # Similar calls can be made to other functions
        >>>         # by replacing `forward` by the name of the function.
        >>>         ans1 = self.s1.forward(value)
        >>>         ans2 = self.s1.inverse(value)
        >>>         ans3 = self.s1.forward_log_jacobian(value)
        >>>         ans4 = self.s1.inverse_log_jacobian(value)
    """

    def __init__(self,
                 name='Exp'):
        param = dict(locals())
--- a/mindspore/nn/probability/bijector/power_transform.py
+++ b/mindspore/nn/probability/bijector/power_transform.py
@@ -31,10 +31,10 @@ class PowerTransform(Bijector):

    The power transform maps inputs from `[-1/c, inf]` to `[0, inf]`.

    This Bijector is equivalent to the `Exp` bijector when `c=0`
    This Bijector is equivalent to the `Exp` bijector when `c=0`.

    Raises:
        ValueError: If the power is less than 0 or is not known statically.
        ValueError: When the power is less than 0 or is not known statically.

    Args:
        power (int or float): The scale factor. Default: 0.
@@ -45,19 +45,19 @@ class PowerTransform(Bijector):
          Default: None.

    Examples:
        >>> # To initialize a PowerTransform bijector of power 0.5
        >>> # To initialize a PowerTransform bijector of power 0.5.
        >>> import mindspore.nn.probability.bijector as msb
        >>> n = msb.PowerTransform(0.5)
        >>>
        >>> # To use PowerTransform distribution in a network
        >>> # To use a PowerTransform bijector in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
        >>>         self.p1 = msb.PowerTransform(0.5)
        >>>
        >>>     def construct(self, value):
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'forward' with the name of the function
        >>>         # Similar calls can be made to other functions
        >>>         # by replacing 'forward' by the name of the function.
        >>>         ans1 = self.s1.forward(value)
        >>>         ans2 = self.s1.inverse(value)
        >>>         ans3 = self.s1.forward_log_jacobian(value)
--- a/mindspore/nn/probability/bijector/scalar_affine.py
+++ b/mindspore/nn/probability/bijector/scalar_affine.py
@@ -35,18 +35,18 @@ class ScalarAffine(Bijector):
        name (str): The name of the bijector. Default: 'ScalarAffine'.

    Examples:
        >>> # To initialize a ScalarAffine bijector of scale 1 and shift 2
        >>> # To initialize a ScalarAffine bijector of scale 1 and shift 2.
        >>> scalaraffine = nn.probability.bijector.ScalarAffine(1, 2)
        >>>
        >>> # To use ScalarAffine bijector in a network
        >>> # To use a ScalarAffine bijector in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
        >>>         self.s1 = nn.probability.bijector.ScalarAffine(1, 2)
        >>>
        >>>     def construct(self, value):
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'forward' with the name of the function
        >>>         # Similar calls can be made to other functions
        >>>         # by replacing 'forward' by the name of the function.
        >>>         ans1 = self.s1.forward(value)
        >>>         ans2 = self.s1.inverse(value)
        >>>         ans3 = self.s1.forward_log_jacobian(value)
@@ -58,11 +58,13 @@ class ScalarAffine(Bijector):
                 shift=0.0,
                 name='ScalarAffine'):
        """
        Constructor of scalar affine Bijector.
        Constructor of ScalarAffine Bijector.
        """
        param = dict(locals())
        validator.check_value_type('scale', scale, [int, float], type(self).__name__)
        validator.check_value_type('shift', shift, [int, float], type(self).__name__)
        validator.check_value_type(
            'scale', scale, [int, float], type(self).__name__)
        validator.check_value_type(
            'shift', shift, [int, float], type(self).__name__)
        self._scale = cast_to_tensor(scale)
        self._shift = cast_to_tensor(shift)
        super(ScalarAffine, self).__init__(
--- a/mindspore/nn/probability/bijector/softplus.py
+++ b/mindspore/nn/probability/bijector/softplus.py
@@ -37,18 +37,18 @@ class Softplus(Bijector):
        name (str): The name of the Bijector. Default: 'Softplus'.

    Examples:
        >>> # To initialize a Softplus bijector of sharpness 2
        >>> # To initialize a Softplus bijector of sharpness 2.
        >>> softplus = nn.probability.bijector.Softfplus(2)
        >>>
        >>> # To use ScalarAffine bijector in a network
        >>> # To use ScalarAffine bijector in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
        >>>         self.sp1 = nn.probability.bijector.Softflus(2)
        >>>
        >>>     def construct(self, value):
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'forward' with the name of the function
        >>>         # Similar calls can be made to other functions
        >>>         # by replacing 'forward' by the name of the function.
        >>>         ans1 = self.sp1.forward(value)
        >>>         ans2 = self.sp1.inverse(value)
        >>>         ans3 = self.sp1.forward_log_jacobian(value)
@@ -58,8 +58,12 @@ class Softplus(Bijector):
    def __init__(self,
                 sharpness=1.0,
                 name='Softplus'):
        """
        Constructor of Softplus Bijector.
        """
        param = dict(locals())
        validator.check_value_type('sharpness', sharpness, [int, float], type(self).__name__)
        validator.check_value_type('sharpness', sharpness,
                                   [int, float], type(self).__name__)
        super(Softplus, self).__init__(name=name, param=param)
        self._sharpness = cast_to_tensor(sharpness)

--- a/mindspore/nn/probability/distribution/bernoulli.py
+++ b/mindspore/nn/probability/distribution/bernoulli.py
@@ -27,80 +27,80 @@ class Bernoulli(Distribution):

    Args:
        probs (float, list, numpy.ndarray, Tensor, Parameter): The probability of that the outcome is 1.
        seed (int): The global seed is used in sampling. Global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the distribution. Default: mstype.int32.
        name (str): The name of the distribution. Default: Bernoulli.
        seed (int): The seed used in sampling. The global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the event samples. Default: mstype.int32.
        name (str): The name of the distribution. Default: 'Bernoulli'.

    Note:
        `probs` should be a proper probability (0 < p < 1).
        dist_spec_args is `probs`.
        `dist_spec_args` is `probs`.

        Examples:
        >>> # To initialize a Bernoulli distribution of prob 0.5
    Examples:
        >>> # To initialize a Bernoulli distribution of the probability 0.5.
        >>> import mindspore.nn.probability.distribution as msd
        >>> b = msd.Bernoulli(0.5, dtype=mstype.int32)
        >>>
        >>> # The following creates two independent Bernoulli distributions
        >>> # The following creates two independent Bernoulli distributions.
        >>> b = msd.Bernoulli([0.5, 0.5], dtype=mstype.int32)
        >>>
        >>> # A Bernoulli distribution can be initilized without arguments
        >>> # In this case, probs must be passed in through args during function calls.
        >>> # A Bernoulli distribution can be initilized without arguments.
        >>> # In this case, `probs` must be passed in through arguments during function calls.
        >>> b = msd.Bernoulli(dtype=mstype.int32)
        >>>
        >>> # To use Bernoulli in a network
        >>> # To use the Bernoulli distribution in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
        >>>         self.b1 = msd.Bernoulli(0.5, dtype=mstype.int32)
        >>>         self.b2 = msd.Bernoulli(dtype=mstype.int32)
        >>>
        >>>     # All the following calls in construct are valid
        >>>     # All the following calls in construct are valid.
        >>>     def construct(self, value, probs_b, probs_a):
        >>>
        >>>         # Private interfaces of probability functions corresponding to public interfaces, including
        >>>         # 'prob', 'log_prob', 'cdf', 'log_cdf', 'survival_function', 'log_survival', have the form:
        >>>         # `prob`, `log_prob`, `cdf`, `log_cdf`, `survival_function`, and `log_survival`, are the same as follows.
        >>>         # Args:
        >>>         #     value (Tensor): value to be evaluated.
        >>>         #     probs1 (Tensor): probability of success. Default: self.probs.
        >>>         #     value (Tensor): the value to be evaluated.
        >>>         #     probs1 (Tensor): the probability of success. Default: self.probs.
        >>>
        >>>         # Example of prob.
        >>>         # Examples of `prob`.
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'prob' with the name of the function
        >>>         # by replacing `prob` by the name of the function.
        >>>         ans = self.b1.prob(value)
        >>>         # Evaluate with the respect to distribution b
        >>>         # Evaluate `prob` with respect to distribution b.
        >>>         ans = self.b1.prob(value, probs_b)
        >>>         # probs must be passed in during function calls
        >>>         # `probs` must be passed in during function calls.
        >>>         ans = self.b2.prob(value, probs_a)
        >>>
        >>>
        >>>         # Functions 'sd', 'var', 'entropy' have the same args.
        >>>         # Functions `mean`, `sd`, `var`, and `entropy` have the same arguments.
        >>>         # Args:
        >>>         #     probs1 (Tensor): probability of success. Default: self.probs.
        >>>         #     probs1 (Tensor): the probability of success. Default: self.probs.
        >>>
        >>>         # Example of mean. sd, var have similar usage.
        >>>         # Examples of `mean`. `sd`, `var`, and `entropy` are similar.
        >>>         ans = self.b1.mean() # return 0.5
        >>>         ans = self.b1.mean(probs_b) # return probs_b
        >>>         # probs must be passed in during function calls
        >>>         # `probs` must be passed in during function calls.
        >>>         ans = self.b2.mean(probs_a)
        >>>
        >>>
        >>>         # Interfaces of 'kl_loss' and 'cross_entropy' are similar:
        >>>         # Interfaces of `kl_loss` and `cross_entropy` are the same as follows:
        >>>         # Args:
        >>>         #     dist (str): name of the distribution. Only 'Bernoulli' is supported.
        >>>         #     probs1_b (Tensor): probability of success of distribution b.
        >>>         #     probs1_a (Tensor): probability of success of distribution a. Default: self.probs.
        >>>         #     dist (str): the name of the distribution. Only 'Bernoulli' is supported.
        >>>         #     probs1_b (Tensor): the probability of success of distribution b.
        >>>         #     probs1_a (Tensor): the probability of success of distribution a. Default: self.probs.
        >>>
        >>>         # Example of kl_loss (cross_entropy is similar):
        >>>         # Examples of kl_loss. `cross_entropy` is similar.
        >>>         ans = self.b1.kl_loss('Bernoulli', probs_b)
        >>>         ans = self.b1.kl_loss('Bernoulli', probs_b, probs_a)
        >>>         # Additional probs_a must be passed in
        >>>         # An additional `probs_a` must be passed in.
        >>>         ans = self.b2.kl_loss('Bernoulli', probs_b, probs_a)
        >>>
        >>>
        >>>         # sample
        >>>         # Examples of `sample`.
        >>>         # Args:
        >>>         #     shape (tuple): shape of the sample. Default: ()
        >>>         #     probs1 (Tensor): probability of success. Default: self.probs.
        >>>         #     shape (tuple): the shape of the sample. Default: ().
        >>>         #     probs1 (Tensor): the probability of success. Default: self.probs.
        >>>         ans = self.b1.sample()
        >>>         ans = self.b1.sample((2,3))
        >>>         ans = self.b1.sample((2,3), probs_b)
@@ -113,7 +113,7 @@ class Bernoulli(Distribution):
                 dtype=mstype.int32,
                 name="Bernoulli"):
        """
        Constructor of Bernoulli distribution.
        Constructor of Bernoulli.
        """
        param = dict(locals())
        valid_dtype = mstype.int_type + mstype.uint_type + mstype.float_type
@@ -200,7 +200,7 @@ class Bernoulli(Distribution):

    def _cross_entropy(self, dist, probs1_b, probs1=None):
        """
        Evaluate cross_entropy between Bernoulli distributions.
        Evaluate cross entropy between Bernoulli distributions.

        Args:
            dist (str): The type of the distributions. Should be "Bernoulli" in this case.
@@ -212,7 +212,7 @@ class Bernoulli(Distribution):

    def _log_prob(self, value, probs1=None):
        r"""
        pmf of Bernoulli distribution.
        Log probability mass function of Bernoulli distributions.

        Args:
            value (Tensor): A Tensor composed of only zeros and ones.
@@ -230,7 +230,7 @@ class Bernoulli(Distribution):

    def _cdf(self, value, probs1=None):
        r"""
        Cumulative distribution function (cdf) of Bernoulli distribution.
        Cumulative distribution function (cdf) of Bernoulli distributions.

        Args:
            value (Tensor): The value to be evaluated.
--- a/mindspore/nn/probability/distribution/distribution.py
+++ b/mindspore/nn/probability/distribution/distribution.py
@@ -19,7 +19,7 @@ from mindspore._checkparam import Validator as validator
 from mindspore._checkparam import Rel
 from mindspore.common import get_seed
 from ._utils.utils import calc_broadcast_shape_from_param, check_scalar_from_param, cast_type_for_device,\
                          raise_none_error
    raise_none_error
 from ._utils.utils import CheckTuple, CheckTensor


@@ -28,24 +28,24 @@ class Distribution(Cell):
    Base class for all mathematical distributions.

    Args:
        seed (int): The global seed is used in sampling. Global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the event samples. Default: subclass dtype.
        name (str): Python string name prefixed to operations created by this class. Default: subclass name.
        seed (int): The seed is used in sampling. The global seed is used if it is None.
        dtype (mindspore.dtype): The type of the event samples.
        name (str): The name of the distribution.
        param (dict): The parameters used to initialize the distribution.

    Note:
        Derived class should override operations such as `_mean`, `_prob`,
        and `_log_prob`. Required arguments, such as value for `_prob`,
        should be passed in through `args` or `kwargs`. dist_spec_args which specify
        and `_log_prob`. Required arguments, such as `value` for `_prob`,
        should be passed in through `args` or `kwargs`. `dist_spec_args` which specifies
        a new distribution are optional.

        dist_spec_args are unique for each type of distribution. For example, `mean` and `sd`
        are the dist_spec_args for a Normal distribution, while `rate` is the dist_spec_args
        for exponential distribution.
        `dist_spec_args` is unique for each type of distribution. For example, `mean` and `sd`
        are the `dist_spec_args` for a Normal distribution, while `rate` is the `dist_spec_args`
        for an Exponential distribution.

        For all functions, passing in dist_spec_args, is optional.
        Passing in the additional dist_spec_args will evaluate the result to be evaluated with
        new distribution specified by the dist_spec_args. But it will not change the original distribution.
        For all functions, passing in `dist_spec_args`, is optional.
        Function calls with the additional `dist_spec_args` passed in will evaluate the result with
        a new distribution specified by the `dist_spec_args`. However, it will not change the original distribution.
    """

    def __init__(self,
@@ -118,9 +118,9 @@ class Distribution(Cell):

    def _check_param_type(self, *args):
        """
        Check the availability and validity of default parameters and dist_spec_args.
        dist_spec_args passed in must be tensors. If default parameter of the distribution
        is None, its parameter must be passed in through `args`.
        Check the availability and validity of default parameters and `dist_spec_args`.
        `dist_spec_args` passed in must be tensors. If default parameters of the distribution
        are None, the parameters must be passed in through `args`.
        """
        broadcast_shape = None
        common_dtype = None
@@ -134,7 +134,8 @@ class Distribution(Cell):
                else:
                    arg = self.checktensor(arg, name)
            else:
                arg = default if default is not None else raise_none_error(name)
                arg = default if default is not None else raise_none_error(
                    name)

            # broadcast if the number of args > 1
            if broadcast_shape is None:
--- a/mindspore/nn/probability/distribution/exponential.py
+++ b/mindspore/nn/probability/distribution/exponential.py
@@ -21,87 +21,88 @@ from .distribution import Distribution
 from ._utils.utils import cast_to_tensor, check_greater_zero, check_type, check_distribution_name, set_param_type
 from ._utils.custom_ops import exp_generic, log_generic


 class Exponential(Distribution):
    """
    Example class: Exponential Distribution.

    Args:
        rate (float, list, numpy.ndarray, Tensor, Parameter): The inverse scale.
        seed (int): The seed used in sampling. Global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the distribution. Default: mstype.float32.
        name (str): The name of the distribution. Default: Exponential.
        seed (int): The seed used in sampling. The global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the event samples. Default: mstype.float32.
        name (str): The name of the distribution. Default: 'Exponential'.

    Note:
        `rate` should be strictly greater than 0.
        dist_spec_args is `rate`.
        `dtype` should be float type because Exponential distributions are continuous.
        `dist_spec_args` is `rate`.
        `dtype` should be a float type because Exponential distributions are continuous.

        Examples:
        >>> # To initialize an Exponential distribution of rate 0.5
    Examples:
        >>> # To initialize an Exponential distribution of the rate 0.5.
        >>> import mindspore.nn.probability.distribution as msd
        >>> e = msd.Exponential(0.5, dtype=mstype.float32)
        >>>
        >>> # The following creates two independent Exponential distributions
        >>> # The following creates two independent Exponential distributions.
        >>> e = msd.Exponential([0.5, 0.5], dtype=mstype.float32)
        >>>
        >>> # An Exponential distribution can be initilized without arguments
        >>> # In this case, rate must be passed in through args during function calls
        >>> # An Exponential distribution can be initilized without arguments.
        >>> # In this case, `rate` must be passed in through `args` during function calls.
        >>> e = msd.Exponential(dtype=mstype.float32)
        >>>
        >>> # To use Exponential in a network
        >>> # To use an Exponential distribution in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
        >>>         self.e1 = msd.Exponential(0.5, dtype=mstype.float32)
        >>>         self.e2 = msd.Exponential(dtype=mstype.float32)
        >>>
        >>>     # All the following calls in construct are valid
        >>>     # All the following calls in construct are valid.
        >>>     def construct(self, value, rate_b, rate_a):
        >>>
        >>>         # Private interfaces of probability functions corresponding to public interfaces, including
        >>>         # 'prob', 'log_prob', 'cdf', 'log_cdf', 'survival_function', 'log_survival', have the form:
        >>>         # `prob`, `log_prob`, `cdf`, `log_cdf`, `survival_function`, and `log_survival`, are the same as follows.
        >>>         # Args:
        >>>         #     value (Tensor): value to be evaluated.
        >>>         #     rate (Tensor): rate of the distribution. Default: self.rate.
        >>>         #     value (Tensor): the value to be evaluated.
        >>>         #     rate (Tensor): the rate of the distribution. Default: self.rate.
        >>>
        >>>         # Example of prob.
        >>>         # Examples of `prob`.
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'prob' with the name of the function
        >>>         # by replacing `prob` by the name of the function.
        >>>         ans = self.e1.prob(value)
        >>>         # Evaluate with the respect to distribution b
        >>>         # Evaluate with respect to distribution b.
        >>>         ans = self.e1.prob(value, rate_b)
        >>>         # Rate must be passed in during function calls
        >>>         # `rate` must be passed in during function calls.
        >>>         ans = self.e2.prob(value, rate_a)
        >>>
        >>>
        >>>         # Functions 'sd', 'var', 'entropy' have the same args.
        >>>         # Functions `mean`, `sd`, 'var', and 'entropy' have the same arguments as follows.
        >>>         # Args:
        >>>         #     rate (Tensor): rate of the distribution. Default: self.rate.
        >>>         #     rate (Tensor): the rate of the distribution. Default: self.rate.
        >>>
        >>>         # Example of mean. sd, var have similar usage.
        >>>         # Examples of `mean`. `sd`, `var`, and `entropy` are similar.
        >>>         ans = self.e1.mean() # return 2
        >>>         ans = self.e1.mean(rate_b) # return 1 / rate_b
        >>>         # Rate must be passed in during function calls
        >>>         # `rate` must be passed in during function calls.
        >>>         ans = self.e2.mean(rate_a)
        >>>
        >>>
        >>>         # Interfaces of 'kl_loss' and 'cross_entropy' are similar:
        >>>         # Interfaces of `kl_loss` and `cross_entropy` are the same.
        >>>         # Args:
        >>>         #     dist (str): name of the distribution. Only 'Exponential' is supported.
        >>>         #     rate_b (Tensor): rate of distribution b.
        >>>         #     rate_a (Tensor): rate of distribution a. Default: self.rate.
        >>>         #     dist (str): The name of the distribution. Only 'Exponential' is supported.
        >>>         #     rate_b (Tensor): the rate of distribution b.
        >>>         #     rate_a (Tensor): the rate of distribution a. Default: self.rate.
        >>>
        >>>         # Example of kl_loss (cross_entropy is similar):
        >>>         # Examples of `kl_loss`. `cross_entropy` is similar.
        >>>         ans = self.e1.kl_loss('Exponential', rate_b)
        >>>         ans = self.e1.kl_loss('Exponential', rate_b, rate_a)
        >>>         # Additional rate must be passed in
        >>>         # An additional `rate` must be passed in.
        >>>         ans = self.e2.kl_loss('Exponential', rate_b, rate_a)
        >>>
        >>>
        >>>         # sample
        >>>         # Examples of `sample`.
        >>>         # Args:
        >>>         #     shape (tuple): shape of the sample. Default: ()
        >>>         #     probs1 (Tensor): rate of distribution. Default: self.rate.
        >>>         #     shape (tuple): the shape of the sample. Default: ()
        >>>         #     probs1 (Tensor): the rate of the distribution. Default: self.rate.
        >>>         ans = self.e1.sample()
        >>>         ans = self.e1.sample((2,3))
        >>>         ans = self.e1.sample((2,3), rate_b)
@@ -114,7 +115,7 @@ class Exponential(Distribution):
                 dtype=mstype.float32,
                 name="Exponential"):
        """
        Constructor of Exponential distribution.
        Constructor of Exponential.
        """
        param = dict(locals())
        valid_dtype = mstype.float_type
@@ -132,7 +133,6 @@ class Exponential(Distribution):

        self.minval = np.finfo(np.float).tiny


        # ops needed for the class
        self.exp = exp_generic
        self.log = log_generic
@@ -148,7 +148,6 @@ class Exponential(Distribution):
        self.sq = P.Square()
        self.uniform = C.uniform


    def extend_repr(self):
        if self.is_scalar_batch:
            str_info = f'rate = {self.rate}'
@@ -197,7 +196,7 @@ class Exponential(Distribution):

    def _cross_entropy(self, dist, rate_b, rate=None):
        """
        Evaluate cross_entropy between Exponential distributions.
        Evaluate cross entropy between Exponential distributions.

        Args:
            dist (str): The type of the distributions. Should be "Exponential" in this case.
@@ -207,10 +206,9 @@ class Exponential(Distribution):
        check_distribution_name(dist, 'Exponential')
        return self._entropy(rate) + self._kl_loss(dist, rate_b, rate)


    def _log_prob(self, value, rate=None):
        r"""
        log_pdf of Exponential distribution.
        Log probability density function of Exponential distributions.

        Args:
            Args:
@@ -234,7 +232,7 @@ class Exponential(Distribution):

    def _cdf(self, value, rate=None):
        r"""
        Cumulative distribution function (cdf) of Exponential distribution.
        Cumulative distribution function (cdf) of Exponential distributions.

        Args:
            value (Tensor): The value to be evaluated.
@@ -256,7 +254,7 @@ class Exponential(Distribution):

    def _log_survival(self, value, rate=None):
        r"""
        log survival_function of Exponential distribution.
        Log survival_function of Exponential distributions.

        Args:
            value (Tensor): The value to be evaluated.
--- a/mindspore/nn/probability/distribution/geometric.py
+++ b/mindspore/nn/probability/distribution/geometric.py
@@ -19,7 +19,7 @@ from mindspore.ops import composite as C
 from mindspore.common import dtype as mstype
 from .distribution import Distribution
 from ._utils.utils import cast_to_tensor, check_prob, check_type, check_distribution_name,\
                          set_param_type
    set_param_type
 from ._utils.custom_ops import exp_generic, log_generic


@@ -32,79 +32,79 @@ class Geometric(Distribution):
    Args:
        probs (float, list, numpy.ndarray, Tensor, Parameter): The probability of success.
        seed (int): The seed used in sampling. Global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the distribution. Default: mstype.int32.
        name (str): The name of the distribution. Default: Geometric.
        dtype (mindspore.dtype): The type of the event samples. Default: mstype.int32.
        name (str): The name of the distribution. Default: 'Geometric'.

    Note:
        `probs` should be a proper probability (0 < p < 1).
        dist_spec_args is `probs`.
        `dist_spec_args` is `probs`.

        Examples:
        >>> # To initialize a Geometric distribution of prob 0.5
    Examples:
        >>> # To initialize a Geometric distribution of the probability 0.5.
        >>> import mindspore.nn.probability.distribution as msd
        >>> n = msd.Geometric(0.5, dtype=mstype.int32)
        >>>
        >>> # The following creates two independent Geometric distributions
        >>> # The following creates two independent Geometric distributions.
        >>> n = msd.Geometric([0.5, 0.5], dtype=mstype.int32)
        >>>
        >>> # A Geometric distribution can be initilized without arguments
        >>> # In this case, probs must be passed in through args during function calls.
        >>> # A Geometric distribution can be initilized without arguments.
        >>> # In this case, `probs` must be passed in through arguments during function calls.
        >>> n = msd.Geometric(dtype=mstype.int32)
        >>>
        >>> # To use Geometric in a network
        >>> # To use a Geometric distribution in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
        >>>         self.g1 = msd.Geometric(0.5, dtype=mstype.int32)
        >>>         self.g2 = msd.Geometric(dtype=mstype.int32)
        >>>
        >>>     # Tthe following calls are valid in construct
        >>>     # The following calls are valid in construct.
        >>>     def construct(self, value, probs_b, probs_a):
        >>>
        >>>         # Private interfaces of probability functions corresponding to public interfaces, including
        >>>         # 'prob', 'log_prob', 'cdf', 'log_cdf', 'survival_function', 'log_survival', have the form:
        >>>         # `prob`, `log_prob`, `cdf`, `log_cdf`, `survival_function`, and `log_survival`, have the same arguments as follows.
        >>>         # Args:
        >>>         #     value (Tensor): value to be evaluated.
        >>>         #     probs1 (Tensor): probability of success of a Bernoulli trail. Default: self.probs.
        >>>         #     value (Tensor): the value to be evaluated.
        >>>         #     probs1 (Tensor): the probability of success of a Bernoulli trail. Default: self.probs.
        >>>
        >>>         # Example of prob.
        >>>         # Examples of `prob`.
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'prob' with the name of the function
        >>>         # by replacing `prob` by the name of the function.
        >>>         ans = self.g1.prob(value)
        >>>         # Evaluate with the respect to distribution b
        >>>         # Evaluate with respect to distribution b.
        >>>         ans = self.g1.prob(value, probs_b)
        >>>         # Probs must be passed in during function calls
        >>>         # `probs` must be passed in during function calls.
        >>>         ans = self.g2.prob(value, probs_a)
        >>>
        >>>
        >>>         # Functions 'sd', 'var', 'entropy' have the same args.
        >>>         # Functions `mean`, `sd`, `var`, and `entropy` have the same arguments.
        >>>         # Args:
        >>>         #     probs1 (Tensor): probability of success of a Bernoulli trail. Default: self.probs.
        >>>         #     probs1 (Tensor): the probability of success of a Bernoulli trail. Default: self.probs.
        >>>
        >>>         # Example of mean. sd, var have similar usage.
        >>>         # Examples of `mean`. `sd`, `var`, and `entropy` are similar.
        >>>         ans = self.g1.mean() # return 1.0
        >>>         ans = self.g1.mean(probs_b)
        >>>         # Probs must be passed in during function calls
        >>>         ans = self.g2.mean(probs_a)
        >>>
        >>>
        >>>         # Interfaces of 'kl_loss' and 'cross_entropy' are similar:
        >>>         # Interfaces of 'kl_loss' and 'cross_entropy' are the same.
        >>>         # Args:
        >>>         #     dist (str): name of the distribution. Only 'Geometric' is supported.
        >>>         #     probs1_b (Tensor): probability of success of a Bernoulli trail of distribution b.
        >>>         #     probs1_a (Tensor): probability of success of a Bernoulli trail of distribution a. Default: self.probs.
        >>>         #     dist (str): the name of the distribution. Only 'Geometric' is supported.
        >>>         #     probs1_b (Tensor): the probability of success of a Bernoulli trail of distribution b.
        >>>         #     probs1_a (Tensor): the probability of success of a Bernoulli trail of distribution a. Default: self.probs.
        >>>
        >>>         # Example of kl_loss (cross_entropy is similar):
        >>>         # Examples of `kl_loss`. `cross_entropy` is similar.
        >>>         ans = self.g1.kl_loss('Geometric', probs_b)
        >>>         ans = self.g1.kl_loss('Geometric', probs_b, probs_a)
        >>>         # Additional probs must be passed in
        >>>         # An additional `probs` must be passed in.
        >>>         ans = self.g2.kl_loss('Geometric', probs_b, probs_a)
        >>>
        >>>
        >>>         # sample
        >>>         # Examples of `sample`.
        >>>         # Args:
        >>>         #     shape (tuple): shape of the sample. Default: ()
        >>>         #     probs1 (Tensor): probability of success of a Bernoulli trail. Default: self.probs.
        >>>         #     shape (tuple): the shape of the sample. Default: ()
        >>>         #     probs1 (Tensor): the probability of success of a Bernoulli trail. Default: self.probs.
        >>>         ans = self.g1.sample()
        >>>         ans = self.g1.sample((2,3))
        >>>         ans = self.g1.sample((2,3), probs_b)
@@ -202,7 +202,7 @@ class Geometric(Distribution):

    def _cross_entropy(self, dist, probs1_b, probs1=None):
        r"""
        Evaluate cross_entropy between Geometric distributions.
        Evaluate cross entropy between Geometric distributions.

        Args:
            dist (str): The type of the distributions. Should be "Geometric" in this case.
@@ -214,7 +214,7 @@ class Geometric(Distribution):

    def _prob(self, value, probs1=None):
        r"""
        pmf of Geometric distribution.
        Probability mass function of Geometric distributions.

        Args:
            value (Tensor): A Tensor composed of only natural numbers.
@@ -235,7 +235,7 @@ class Geometric(Distribution):

    def _cdf(self, value, probs1=None):
        r"""
        Cumulative distribution function (cdf) of Geometric distribution.
        Cumulative distribution function (cdf) of Geometric distributions.

        Args:
            value (Tensor): A Tensor composed of only natural numbers.
@@ -285,7 +285,7 @@ class Geometric(Distribution):
            probs (Tensor): The probability of success. Default: self.probs.

        Returns:
            Tensor, shape is shape + batch_shape.
            Tensor,  with the shape being shape + batch_shape.
        """
        shape = self.checktuple(shape, 'shape')
        probs1 = self._check_param_type(probs1)
--- a/mindspore/nn/probability/distribution/normal.py
+++ b/mindspore/nn/probability/distribution/normal.py
@@ -19,9 +19,10 @@ from mindspore.ops import composite as C
 from mindspore.common import dtype as mstype
 from .distribution import Distribution
 from ._utils.utils import cast_to_tensor, check_greater_zero, check_type, check_distribution_name,\
                          set_param_type
    set_param_type
 from ._utils.custom_ops import exp_generic, expm1_generic, log_generic, erf_generic


 class Normal(Distribution):
    """
    Normal distribution.
@@ -29,85 +30,85 @@ class Normal(Distribution):
    Args:
        mean (int, float, list, numpy.ndarray, Tensor, Parameter): The mean of the Normal distribution.
        sd (int, float, list, numpy.ndarray, Tensor, Parameter): The standard deviation of the Normal distribution.
        seed (int): The seed used in sampling. Global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the distribution. Default: mstype.float32.
        name (str): The name of the distribution. Default: Normal.
        seed (int): The seed used in sampling. The global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the event samples. Default: mstype.float32.
        name (str): The name of the distribution. Default: 'Normal'.

    Note:
        `sd` should be greater than zero.
        dist_spec_args are `mean` and `sd`.
        `dtype` should be float type because Normal distributions are continuous.
        `dist_spec_args` are `mean` and `sd`.
        `dtype` should be a float type because Normal distributions are continuous.

        Examples:
        >>> # To initialize a Normal distribution of mean 3.0 and standard deviation 4.0
    Examples:
        >>> # To initialize a Normal distribution of the mean 3.0 and the standard deviation 4.0.
        >>> import mindspore.nn.probability.distribution as msd
        >>> n = msd.Normal(3.0, 4.0, dtype=mstype.float32)
        >>>
        >>> # The following creates two independent Normal distributions
        >>> # The following creates two independent Normal distributions.
        >>> n = msd.Normal([3.0, 3.0], [4.0, 4.0], dtype=mstype.float32)
        >>>
        >>> # A Normal distribution can be initilize without arguments
        >>> # In this case, mean and sd must be passed in through args.
        >>> # A Normal distribution can be initilize without arguments.
        >>> # In this case, `mean` and `sd` must be passed in through arguments.
        >>> n = msd.Normal(dtype=mstype.float32)
        >>>
        >>> # To use Normal in a network
        >>> # To use a Normal distribution in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
        >>>         self.n1 = msd.Nomral(0.0, 1.0, dtype=mstype.float32)
        >>>         self.n2 = msd.Normal(dtype=mstype.float32)
        >>>
        >>>     # The following calls are valid in construct
        >>>     # The following calls are valid in construct.
        >>>     def construct(self, value, mean_b, sd_b, mean_a, sd_a):
        >>>
        >>>         # Private interfaces of probability functions corresponding to public interfaces, including
        >>>         # 'prob', 'log_prob', 'cdf', 'log_cdf', 'survival_function', 'log_survival', have the form:
        >>>         # `prob`, `log_prob`, `cdf`, `log_cdf`, `survival_function`, and `log_survival`, have the same arguments as follows.
        >>>         # Args:
        >>>         #     value (Tensor): value to be evaluated.
        >>>         #     mean (Tensor): mean of distribution. Default: self._mean_value.
        >>>         #     sd (Tensor): standard deviation of distribution. Default: self._sd_value.
        >>>         #     value (Tensor): the value to be evaluated.
        >>>         #     mean (Tensor): the mean of distribution. Default: self._mean_value.
        >>>         #     sd (Tensor): the standard deviation of distribution. Default: self._sd_value.
        >>>
        >>>         # Example of prob.
        >>>         # Examples of `prob`.
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'prob' with the name of the function
        >>>         # by replacing 'prob' by the name of the function
        >>>         ans = self.n1.prob(value)
        >>>         # Evaluate with the respect to distribution b
        >>>         # Evaluate with respect to distribution b.
        >>>         ans = self.n1.prob(value, mean_b, sd_b)
        >>>         # mean and sd must be passed in during function calls
        >>>         # `mean` and `sd` must be passed in during function calls
        >>>         ans = self.n2.prob(value, mean_a, sd_a)
        >>>
        >>>
        >>>         # Functions 'sd', 'var', 'entropy' have the same args.
        >>>         # Functions `mean`, `sd`, `var`, and `entropy` have the same arguments.
        >>>         # Args:
        >>>         #     mean (Tensor): mean of distribution. Default: self._mean_value.
        >>>         #     sd (Tensor): standard deviation of distribution. Default: self._sd_value.
        >>>         #     mean (Tensor): the mean of distribution. Default: self._mean_value.
        >>>         #     sd (Tensor): the standard deviation of distribution. Default: self._sd_value.
        >>>
        >>>         # Example of mean. sd, var have similar usage.
        >>>         # Example of `mean`. `sd`, `var`, and `entropy` are similar.
        >>>         ans = self.n1.mean() # return 0.0
        >>>         ans = self.n1.mean(mean_b, sd_b) # return mean_b
        >>>         # mean and sd must be passed in during function calls
        >>>         # `mean` and `sd` must be passed in during function calls.
        >>>         ans = self.n2.mean(mean_a, sd_a)
        >>>
        >>>
        >>>         # Interfaces of 'kl_loss' and 'cross_entropy' are similar:
        >>>         # Interfaces of 'kl_loss' and 'cross_entropy' are the same:
        >>>         # Args:
        >>>         #     dist (str): type of the distributions. Should be "Normal" in this case.
        >>>         #     mean_b (Tensor): mean of distribution b.
        >>>         #     sd_b (Tensor): standard deviation distribution b.
        >>>         #     mean_a (Tensor): mean of distribution a. Default: self._mean_value.
        >>>         #     sd_a (Tensor): standard deviation distribution a. Default: self._sd_value.
        >>>         #     dist (str): the type of the distributions. Only "Normal" is supported.
        >>>         #     mean_b (Tensor): the mean of distribution b.
        >>>         #     sd_b (Tensor): the standard deviation distribution b.
        >>>         #     mean_a (Tensor): the mean of distribution a. Default: self._mean_value.
        >>>         #     sd_a (Tensor): the standard deviation distribution a. Default: self._sd_value.
        >>>
        >>>         # Example of kl_loss (cross_entropy is similar):
        >>>         # Examples of `kl_loss`. `cross_entropy` is similar.
        >>>         ans = self.n1.kl_loss('Normal', mean_b, sd_b)
        >>>         ans = self.n1.kl_loss('Normal', mean_b, sd_b, mean_a, sd_a)
        >>>         # Additional mean and sd must be passed in
        >>>         # Additional `mean` and `sd` must be passed in.
        >>>         ans = self.n2.kl_loss('Normal', mean_b, sd_b, mean_a, sd_a)
        >>>
        >>>         # sample
        >>>         # Examples of `sample`.
        >>>         # Args:
        >>>         #     shape (tuple): shape of the sample. Default: ()
        >>>         #     mean (Tensor): mean of distribution. Default: self._mean_value.
        >>>         #     sd (Tensor): standard deviation of distribution. Default: self._sd_value.
        >>>         #     shape (tuple): the shape of the sample. Default: ()
        >>>         #     mean (Tensor): the mean of the distribution. Default: self._mean_value.
        >>>         #     sd (Tensor): the standard deviation of the distribution. Default: self._sd_value.
        >>>         ans = self.n1.sample()
        >>>         ans = self.n1.sample((2,3))
        >>>         ans = self.n1.sample((2,3), mean_b, sd_b)
@@ -121,25 +122,28 @@ class Normal(Distribution):
                 dtype=mstype.float32,
                 name="Normal"):
        """
        Constructor of normal distribution.
        Constructor of Normal.
        """
        param = dict(locals())
        valid_dtype = mstype.float_type
        check_type(dtype, valid_dtype, type(self).__name__)
        super(Normal, self).__init__(seed, dtype, name, param)
        self.parameter_type = set_param_type({'mean': mean, 'sd': sd}, self.dtype)
        if  mean is not None and sd is not None:
        self.parameter_type = set_param_type(
            {'mean': mean, 'sd': sd}, self.dtype)
        if mean is not None and sd is not None:
            self._mean_value = cast_to_tensor(mean, self.parameter_type)
            self._sd_value = cast_to_tensor(sd, self.parameter_type)
            check_greater_zero(self._sd_value, "Standard deviation")
        else:
            self._mean_value = mean if mean is None else cast_to_tensor(mean, self.parameter_type)
            self._sd_value = sd if sd is None else cast_to_tensor(sd, self.parameter_type)
            self._mean_value = mean if mean is None else cast_to_tensor(
                mean, self.parameter_type)
            self._sd_value = sd if sd is None else cast_to_tensor(
                sd, self.parameter_type)

        self.default_parameters = [self._mean_value, self._sd_value]
        self.parameter_names = ['mean', 'sd']

        #ops needed for the class
        # ops needed for the class
        self.exp = exp_generic
        self.expm1 = expm1_generic
        self.log = log_generic
@@ -195,7 +199,7 @@ class Normal(Distribution):

    def _cross_entropy(self, dist, mean_b, sd_b, mean=None, sd=None):
        r"""
        Evaluate cross_entropy between normal distributions.
        Evaluate cross entropy between normal distributions.

        Args:
            dist (str): Type of the distributions. Should be "Normal" in this case.
@@ -222,13 +226,15 @@ class Normal(Distribution):
        value = self._check_value(value, 'value')
        value = self.cast(value, self.dtype)
        mean, sd = self._check_param_type(mean, sd)
        unnormalized_log_prob = -1. * (self.sq(value - mean)) / (2. * self.sq(sd))
        neg_normalization = -1. * self.log(self.const(2. * np.pi)) / 2. - self.log(sd)
        unnormalized_log_prob = -1. * \
            (self.sq(value - mean)) / (2. * self.sq(sd))
        neg_normalization = -1. * \
            self.log(self.const(2. * np.pi)) / 2. - self.log(sd)
        return unnormalized_log_prob + neg_normalization

    def _cdf(self, value, mean=None, sd=None):
        r"""
        Evaluate cdf of given value.
        Evaluate the cumulative distribution function on the given value.

        Args:
            value (Tensor): The value to be evaluated.
@@ -280,7 +286,7 @@ class Normal(Distribution):
            sd (Tensor): The standard deviation of the samples. Default: self._sd_value.

        Returns:
            Tensor, shape is shape + batch_shape.
            Tensor, with the shape being shape + batch_shape.
        """
        shape = self.checktuple(shape, 'shape')
        mean, sd = self._check_param_type(mean, sd)
--- a/mindspore/nn/probability/distribution/transformed_distribution.py
+++ b/mindspore/nn/probability/distribution/transformed_distribution.py
@@ -20,6 +20,7 @@ from .distribution import Distribution
 from ._utils.utils import check_type, raise_not_impl_error
 from ._utils.custom_ops import exp_generic, log_generic


 class TransformedDistribution(Distribution):
    """
    Transformed Distribution.
@@ -29,7 +30,7 @@ class TransformedDistribution(Distribution):
    Args:
        bijector (Bijector): The transformation to perform.
        distribution (Distribution): The original distribution.
        name (str): The name of the transformed distribution. Default: transformed_distribution.
        name (str): The name of the transformed distribution. Default: 'transformed_distribution'.

    Note:
        The arguments used to initialize the original distribution cannot be None.
@@ -37,15 +38,15 @@ class TransformedDistribution(Distribution):
        TransformedDistribution since `mean` and `sd` are not specified.

    Examples:
        >>> # To initialize a transformed distribution, e.g. lognormal distribution,
        >>> # using Normal distribution as the base distribution, and Exp bijector as the bijector function.
        >>> # To initialize a transformed distribution, e.g. a lognormal distribution,
        >>> # using a Normal distribution as the base distribution, and an Exp bijector as the bijector function.
        >>> import mindspore.nn.probability.distribution as msd
        >>> import mindspore.nn.probability.bijector as msb
        >>> ln = msd.TransformedDistribution(msb.Exp(),
        >>>                                  msd.Normal(0.0, 1.0, dtype=mstype.float32),
        >>>                                  dtype=mstype.float32)
        >>>
        >>> # To use a transformed distribution in a network
        >>> # To use a transformed distribution in a network.
        >>> class net(Cell):
        >>>     def __init__(self):
        >>>         super(net, self).__init__():
@@ -54,10 +55,11 @@ class TransformedDistribution(Distribution):
        >>>                                               dtype=mstype.float32)
        >>>
        >>>     def construct(self, value):
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'sample' with the name of the function
        >>>         # Similar calls can be made to other functions
        >>>         # by replacing 'sample' by the name of the function.
        >>>         ans = self.ln.sample(shape=(2, 3))
    """

    def __init__(self,
                 bijector,
                 distribution,
@@ -68,8 +70,10 @@ class TransformedDistribution(Distribution):
        Constructor of transformed_distribution class.
        """
        param = dict(locals())
        validator.check_value_type('bijector', bijector, [nn.probability.bijector.Bijector], type(self).__name__)
        validator.check_value_type('distribution', distribution, [Distribution], type(self).__name__)
        validator.check_value_type('bijector', bijector,
                                   [nn.probability.bijector.Bijector], type(self).__name__)
        validator.check_value_type('distribution', distribution,
                                   [Distribution], type(self).__name__)
        valid_dtype = mstype.number_type
        check_type(dtype, valid_dtype, type(self).__name__)
        super(TransformedDistribution, self).__init__(seed, dtype, name, param)
--- a/mindspore/nn/probability/distribution/uniform.py
+++ b/mindspore/nn/probability/distribution/uniform.py
@@ -18,9 +18,10 @@ from mindspore.ops import composite as C
 from mindspore.common import dtype as mstype
 from .distribution import Distribution
 from ._utils.utils import cast_to_tensor, check_greater, check_type, check_distribution_name,\
                          set_param_type
    set_param_type
 from ._utils.custom_ops import exp_generic, log_generic


 class Uniform(Distribution):
    """
    Example class: Uniform Distribution.
@@ -28,85 +29,85 @@ class Uniform(Distribution):
    Args:
        low (int, float, list, numpy.ndarray, Tensor, Parameter): The lower bound of the distribution.
        high (int, float, list, numpy.ndarray, Tensor, Parameter): The upper bound of the distribution.
        seed (int): The seed uses in sampling. Global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the distribution. Default: mstype.float32.
        name (str): The name of the distribution. Default: Uniform.
        seed (int): The seed uses in sampling. The global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the event samples. Default: mstype.float32.
        name (str): The name of the distribution. Default: 'Uniform'.

    Note:
        `low` should be stricly less than `high`.
        dist_spec_args are `high` and `low`.
        `dist_spec_args` are `high` and `low`.
        `dtype` should be float type because Uniform distributions are continuous.

        Examples:
        >>> # To initialize a Uniform distribution of mean 3.0 and standard deviation 4.0
    Examples:
        >>> # To initialize a Uniform distribution of the lower bound 0.0 and the higher bound 1.0.
        >>> import mindspore.nn.probability.distribution as msd
        >>> u = msd.Uniform(0.0, 1.0, dtype=mstype.float32)
        >>>
        >>> # The following creates two independent Uniform distributions
        >>> # The following creates two independent Uniform distributions.
        >>> u = msd.Uniform([0.0, 0.0], [1.0, 2.0], dtype=mstype.float32)
        >>>
        >>> # A Uniform distribution can be initilized without arguments
        >>> # In this case, high and low must be passed in through args during function calls.
        >>> # A Uniform distribution can be initilized without arguments.
        >>> # In this case, `high` and `low` must be passed in through arguments during function calls.
        >>> u = msd.Uniform(dtype=mstype.float32)
        >>>
        >>> # To use Uniform in a network
        >>> # To use a Uniform distribution in a network.
        >>> class net(Cell):
        >>>     def __init__(self)
        >>>         super(net, self).__init__():
        >>>         self.u1 = msd.Uniform(0.0, 1.0, dtype=mstype.float32)
        >>>         self.u2 = msd.Uniform(dtype=mstype.float32)
        >>>
        >>>     # All the following calls in construct are valid
        >>>     # All the following calls in construct are valid.
        >>>     def construct(self, value, low_b, high_b, low_a, high_a):
        >>>
        >>>         # Private interfaces of probability functions corresponding to public interfaces, including
        >>>         # 'prob', 'log_prob', 'cdf', 'log_cdf', 'survival_function', 'log_survival', have the form:
        >>>         # `prob`, `log_prob`, `cdf`, `log_cdf`, `survival_function`, and `log_survival`, have the same arguments.
        >>>         # Args:
        >>>         #     value (Tensor): value to be evaluated.
        >>>         #     low (Tensor): lower bound of distribution. Default: self.low.
        >>>         #     high (Tensor): higher bound of distribution. Default: self.high.
        >>>         #     value (Tensor): the value to be evaluated.
        >>>         #     low (Tensor): the lower bound of distribution. Default: self.low.
        >>>         #     high (Tensor): the higher bound of distribution. Default: self.high.
        >>>
        >>>         # Example of prob.
        >>>         # Examples of `prob`.
        >>>         # Similar calls can be made to other probability functions
        >>>         # by replacing 'prob' with the name of the function
        >>>         # by replacing 'prob' by the name of the function.
        >>>         ans = self.u1.prob(value)
        >>>         # Evaluate with the respect to distribution b
        >>>         # Evaluate with respect to distribution b.
        >>>         ans = self.u1.prob(value, low_b, high_b)
        >>>         # High and low must be passed in during function calls
        >>>         # `high` and `low` must be passed in during function calls.
        >>>         ans = self.u2.prob(value, low_a, high_a)
        >>>
        >>>
        >>>         # Functions 'sd', 'var', 'entropy' have the same args.
        >>>         # Functions `mean`, `sd`, `var`, and `entropy` have the same arguments.
        >>>         # Args:
        >>>         #     low (Tensor): lower bound of distribution. Default: self.low.
        >>>         #     high (Tensor): higher bound of distribution. Default: self.high.
        >>>         #     low (Tensor): the lower bound of distribution. Default: self.low.
        >>>         #     high (Tensor): the higher bound of distribution. Default: self.high.
        >>>
        >>>         # Example of mean. sd, var have similar usage.
        >>>         # Examples of `mean`. `sd`, `var`, and `entropy` are similar.
        >>>         ans = self.u1.mean() # return 0.5
        >>>         ans = self.u1.mean(low_b, high_b) # return (low_b + high_b) / 2
        >>>         # High and low must be passed in during function calls
        >>>         # `high` and `low` must be passed in during function calls.
        >>>         ans = self.u2.mean(low_a, high_a)
        >>>
        >>>         # Interfaces of 'kl_loss' and 'cross_entropy' are similar:
        >>>         # Interfaces of 'kl_loss' and 'cross_entropy' are the same.
        >>>         # Args:
        >>>         #     dist (str): type of the distributions. Should be "Uniform" in this case.
        >>>         #     low_b (Tensor): lower bound of distribution b.
        >>>         #     high_b (Tensor): upper bound of distribution b.
        >>>         #     low_a (Tensor): lower bound of distribution a. Default: self.low.
        >>>         #     high_a (Tensor): upper bound of distribution a. Default: self.high.
        >>>         #     dist (str): the type of the distributions. Should be "Uniform" in this case.
        >>>         #     low_b (Tensor): the lower bound of distribution b.
        >>>         #     high_b (Tensor): the upper bound of distribution b.
        >>>         #     low_a (Tensor): the lower bound of distribution a. Default: self.low.
        >>>         #     high_a (Tensor): the upper bound of distribution a. Default: self.high.
        >>>
        >>>         # Example of kl_loss (cross_entropy is similar):
        >>>         # Examples of `kl_loss`. `cross_entropy` is similar.
        >>>         ans = self.u1.kl_loss('Uniform', low_b, high_b)
        >>>         ans = self.u1.kl_loss('Uniform', low_b, high_b, low_a, high_a)
        >>>         # Additional high and low must be passed in
        >>>         # Additional `high` and `low` must be passed in.
        >>>         ans = self.u2.kl_loss('Uniform', low_b, high_b, low_a, high_a)
        >>>
        >>>
        >>>         # sample
        >>>         # Examples of `sample`.
        >>>         # Args:
        >>>         #     shape (tuple): shape of the sample. Default: ()
        >>>         #     low (Tensor): lower bound of distribution. Default: self.low.
        >>>         #     high (Tensor): higher bound of distribution. Default: self.high.
        >>>         #     shape (tuple): the shape of the sample. Default: ()
        >>>         #     low (Tensor): the lower bound of the distribution. Default: self.low.
        >>>         #     high (Tensor): the upper bound of the distribution. Default: self.high.
        >>>         ans = self.u1.sample()
        >>>         ans = self.u1.sample((2,3))
        >>>         ans = self.u1.sample((2,3), low_b, high_b)
@@ -126,14 +127,17 @@ class Uniform(Distribution):
        valid_dtype = mstype.float_type
        check_type(dtype, valid_dtype, type(self).__name__)
        super(Uniform, self).__init__(seed, dtype, name, param)
        self.parameter_type = set_param_type({'low': low, 'high': high}, self.dtype)
        self.parameter_type = set_param_type(
            {'low': low, 'high': high}, self.dtype)
        if low is not None and high is not None:
            self._low = cast_to_tensor(low, self.parameter_type)
            self._high = cast_to_tensor(high, self.parameter_type)
            check_greater(self.low, self.high, "low value", "high value")
        else:
            self._low = low if low is None else cast_to_tensor(low, self.parameter_type)
            self._high = high if high is None else cast_to_tensor(high, self.parameter_type)
            self._low = low if low is None else cast_to_tensor(
                low, self.parameter_type)
            self._high = high if high is None else cast_to_tensor(
                high, self.parameter_type)

        self.default_parameters = [self.low, self.high]
        self.parameter_names = ['low', 'high']
@@ -168,14 +172,14 @@ class Uniform(Distribution):
    @property
    def low(self):
        """
        Return lower bound of the distribution.
        Return the lower bound of the distribution.
        """
        return self._low

    @property
    def high(self):
        """
        Return upper bound of the distribution.
        Return the upper bound of the distribution.
        """
        return self._high

@@ -215,7 +219,7 @@ class Uniform(Distribution):

    def _cross_entropy(self, dist, low_b, high_b, low=None, high=None):
        """
        Evaluate cross_entropy between Uniform distributoins.
        Evaluate cross entropy between Uniform distributoins.

        Args:
            dist (str): The type of the distributions. Should be "Uniform" in this case.
@@ -271,12 +275,13 @@ class Uniform(Distribution):
        high_b = self.cast(high_b, self.parameter_type)
        low_a, high_a = self._check_param_type(low, high)
        kl = self.log(high_b - low_b) - self.log(high_a - low_a)
        comp = self.logicaland(self.lessequal(low_b, low_a), self.lessequal(high_a, high_b))
        comp = self.logicaland(self.lessequal(
            low_b, low_a), self.lessequal(high_a, high_b))
        return self.select(comp, kl, self.log(self.zeroslike(kl)))

    def _cdf(self, value, low=None, high=None):
        r"""
        cdf of Uniform distribution.
        The cumulative distribution function of Uniform distribution.

        Args:
            value (Tensor): The value to be evaluated.
@@ -310,7 +315,7 @@ class Uniform(Distribution):
            high (Tensor): The upper bound of the distribution. Default: self.high.

        Returns:
            Tensor, shape is shape + batch_shape.
            Tensor, with the shape being shape + batch_shape.
        """
        shape = self.checktuple(shape, 'shape')
        low, high = self._check_param_type(low, high)