!14323 update document of BroadcastTo, GatherD, UnsortedSegmentMax, etc.

From: @mind-lh Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghui
5 years ago · 7392aaf7cb
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -2069,6 +2069,10 @@ class UnsortedSegmentMin(PrimitiveWithCheck):
    """
    Computes the minimum of a tensor along segments.

    Note:
        If the segment_id i is absent in the segment_ids, then output[i] will be filled with
        the maximum value of the input_x's type.

    Inputs:
        - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
          The data type must be float16, float32 or int32.
@@ -2076,10 +2080,6 @@ class UnsortedSegmentMin(PrimitiveWithCheck):
          The data type must be int32.
        - **num_segments** (int) - The value specifies the number of distinct `segment_ids`.

    Note:
        If the segment_id i is absent in the segment_ids, then output[i] will be filled with
        the maximum value of the input_x's type.

    Outputs:
        Tensor, set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`.

@@ -2128,6 +2128,10 @@ class UnsortedSegmentMax(PrimitiveWithCheck):
    """
    Computes the maximum along segments of a tensor.

    Note:
        If the segment_id i is absent in the segment_ids, then output[i] will be filled with
        the minimum value of the input_x's type.

    Inputs:
        - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
          The data type must be float16, float32 or int32.
@@ -2135,10 +2139,6 @@ class UnsortedSegmentMax(PrimitiveWithCheck):
          The data type must be int32.
        - **num_segments** (int) - The value specifies the number of distinct `segment_ids`.

    Note:
        If the segment_id i is absent in the segment_ids, then output[i] will be filled with
        the minimum value of the input_x's type.

    Outputs:
        Tensor, set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`.

@@ -4619,8 +4619,8 @@ class BroadcastTo(PrimitiveWithInfer):

    Raises:
        TypeError: If `shape` is not a tuple.
        ValueError: if the target and input shapes are incompatible, or if a -1 in the
        target shape is in an invalid location.
        ValueError: if the target and input shapes are incompatible, or if a - 1 in the target shape is in an invalid
                    location.

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``
@@ -5150,6 +5150,9 @@ class GatherD(PrimitiveWithInfer):
    Gathers values along an axis specified by dim.

    For a 3-D tensor, the output is:

    .. code-block::

        output[i][j][k] = x[index[i][j][k]][j][k]  # if dim == 0

        output[i][j][k] = x[i][index[i][j][k]][k]  # if dim == 1
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -3864,11 +3864,10 @@ class Sign(PrimitiveWithInfer):
    r"""
    Performs sign on the tensor element-wise.

    Note:
        .. math::
            sign(x) = \begin{cases} -1, &if\ x < 0 \cr
            0, &if\ x = 0 \cr
            1, &if\ x > 0\end{cases}
    .. math::
        sign(x) = \begin{cases} -1, &if\ x < 0 \cr
        0, &if\ x = 0 \cr
        1, &if\ x > 0\end{cases}

    Inputs:
        - **input_x** (Tensor) - The input tensor.
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2255,14 +2255,17 @@ class SoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
    r"""
    Gets the softmax cross-entropy value between logits and labels with one-hot encoding.

    Note:
        Sets input logits as `X`, input label as `Y`, output as `loss`. Then,

        .. math::
            p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)}
    The updating formulas of SoftmaxCrossEntropyWithLogits algorithm are as follows,

        .. math::
    .. math::
        \begin{array}{ll} \\
            p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} \\
            loss_{ij} = -\sum_j{Y_{ij} * ln(p_{ij})}
        \end{array}

    where :math:`X` represents `logits`.
    :math:`Y` represents `label`.
    :math:`loss` represents `output`.

    Inputs:
        - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
@@ -2450,12 +2453,15 @@ class SmoothL1Loss(PrimitiveWithInfer):
    SmoothL1Loss is a Loss similar to MSELoss but less sensitive to outliers as described in the
    `Fast R-CNN <https://arxiv.org/abs/1504.08083>`_ by Ross Girshick.

    Note:
        Sets input prediction as `X`, input target as `Y`, output as `loss`. Then,
    The updating formulas of SmoothL1Loss algorithm are as follows,

        .. math::
            \text{SmoothL1Loss} = \begin{cases} \frac{0.5 x^{2}}{\text{beta}}, &if \left |x \right | < \text{beta} \cr
            \left |x \right|-0.5 \text{beta}, &\text{otherwise}\end{cases}
    .. math::
        \text{SmoothL1Loss} = \begin{cases} \frac{0.5 x^{2}}{\text{beta}}, &if \left |x \right | < \text{beta} \cr
        \left |x \right|-0.5 \text{beta}, &\text{otherwise}\end{cases}

    where :math:`X` represents `prediction`.
    :math:`Y` represents `target`.
    :math:`loss` represents `output`.

    Args:
        beta (float): A parameter used to control the point where the function will change from
@@ -2739,28 +2745,25 @@ class SGD(PrimitiveWithCheck):


 class ApplyRMSProp(PrimitiveWithInfer):
    """
    r"""
    Optimizer that implements the Root Mean Square prop(RMSProp) algorithm.
    Please refer to the usage in source code of `nn.RMSProp`.

    Note:
        Update `var` according to the RMSProp algorithm.

        ..  math::
            s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2

        ..  math::
            m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} + \\epsilon}} \\nabla Q_{i}(w)
    The updating formulas of ApplyRMSProp algorithm are as follows,

        ..  math::
    .. math::
        \begin{array}{ll} \\
            s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\
            m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} + \\epsilon}} \\nabla Q_{i}(w) \\
            w = w - m_{t}
        \end{array}

        where :math:`w` represents `var`, which will be updated.
        :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`,
        :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`.
        :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`.
        :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
        :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`.
    where :math:`w` represents `var`, which will be updated.
    :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`,
    :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`.
    :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`.
    :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
    :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`.

    Args:
        use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
@@ -2838,32 +2841,27 @@ class ApplyRMSProp(PrimitiveWithInfer):


 class ApplyCenteredRMSProp(PrimitiveWithInfer):
    """
    r"""
    Optimizer that implements the centered RMSProp algorithm.
    Please refer to the usage in source code of `nn.RMSProp`.

    Note:
        Update `var` according to the centered RMSProp algorithm.

        ..  math::
            g_{t} = \\rho g_{t-1} + (1 - \\rho)\\nabla Q_{i}(w)
    The updating formulas of ApplyCenteredRMSProp algorithm are as follows,

        ..  math::
            s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2

        ..  math::
            m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} - g_{t}^2 + \\epsilon}} \\nabla Q_{i}(w)

        ..  math::
    .. math::
        /begin{array}{ll} \\
            g_{t} = \\rho g_{t-1} + (1 - \\rho)\\nabla Q_{i}(w) \\
            s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\
            m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} - g_{t}^2 + \\epsilon}} \\nabla Q_{i}(w) \\
            w = w - m_{t}
        /end{array}

        where :math:`w` represents `var`, which will be updated.
        :math:`g_{t}` represents `mean_gradient`, :math:`g_{t-1}` is the last momentent of :math:`g_{t}`.
        :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`,
        :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`.
        :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`.
        :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
        :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`.
    where :math:`w` represents `var`, which will be updated.
    :math:`g_{t}` represents `mean_gradient`, :math:`g_{t-1}` is the last momentent of :math:`g_{t}`.
    :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`,
    :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`.
    :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`.
    :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`.
    :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`.

    Args:
        use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
@@ -3020,7 +3018,7 @@ class L2Normalize(PrimitiveWithInfer):

    Args:
        axis (Union[list(int), tuple(int), int]): The starting axis for the input to apply the L2 normalization.
        Default: 0.
                                                  Default: 0.
        epsilon (float): A small value added for numerical stability. Default: 1e-4.

    Inputs:
@@ -4865,22 +4863,24 @@ class KLDivLoss(PrimitiveWithInfer):
    r"""
    Computes the Kullback-Leibler divergence between the target and the output.

    Note:
        Sets input as :math:`x`, input label as :math:`y`, output as :math:`\ell(x, y)`.
        Let,
    The updating formulas of KLDivLoss algorithm are as follows,

        .. math::
            L = \{l_1,\dots,l_N\}^\top, \quad
            l_n = y_n \cdot (\log y_n - x_n)
    .. math::
        L = \{l_1,\dots,l_N\}^\top, \quad
        l_n = y_n \cdot (\log y_n - x_n)

        Then,

        .. math::
            \ell(x, y) = \begin{cases}
            L, & \text{if reduction} = \text{'none';}\\
            \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
            \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
            \end{cases}
    .. math::
        \ell(x, y) = \begin{cases}
        L, & \text{if reduction} = \text{'none';}\\
        \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
        \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
        \end{cases}

    where :math:`x` represents `input`.
    :math:`y` represents `label`.
    :math:`\ell(x, y)` represents `output`.

    Args:
        reduction (str): Specifies the reduction to be applied to the output.