From abf3305ad9477e9ea68b120d6e6b2b85814fc313 Mon Sep 17 00:00:00 2001 From: liuhe Date: Mon, 29 Mar 2021 16:01:20 +0800 Subject: [PATCH] update codument of BroadcastTo, GatherD, UnsortedSegmentMax, etc. --- mindspore/ops/operations/array_ops.py | 23 ++--- mindspore/ops/operations/math_ops.py | 9 +- mindspore/ops/operations/nn_ops.py | 120 +++++++++++++------------- 3 files changed, 77 insertions(+), 75 deletions(-) diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index a26c0833de..82f874835c 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -2069,6 +2069,10 @@ class UnsortedSegmentMin(PrimitiveWithCheck): """ Computes the minimum of a tensor along segments. + Note: + If the segment_id i is absent in the segment_ids, then output[i] will be filled with + the maximum value of the input_x's type. + Inputs: - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`. The data type must be float16, float32 or int32. @@ -2076,10 +2080,6 @@ class UnsortedSegmentMin(PrimitiveWithCheck): The data type must be int32. - **num_segments** (int) - The value specifies the number of distinct `segment_ids`. - Note: - If the segment_id i is absent in the segment_ids, then output[i] will be filled with - the maximum value of the input_x's type. - Outputs: Tensor, set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`. @@ -2128,6 +2128,10 @@ class UnsortedSegmentMax(PrimitiveWithCheck): """ Computes the maximum along segments of a tensor. + Note: + If the segment_id i is absent in the segment_ids, then output[i] will be filled with + the minimum value of the input_x's type. + Inputs: - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`. The data type must be float16, float32 or int32. @@ -2135,10 +2139,6 @@ class UnsortedSegmentMax(PrimitiveWithCheck): The data type must be int32. - **num_segments** (int) - The value specifies the number of distinct `segment_ids`. - Note: - If the segment_id i is absent in the segment_ids, then output[i] will be filled with - the minimum value of the input_x's type. - Outputs: Tensor, set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`. @@ -4619,8 +4619,8 @@ class BroadcastTo(PrimitiveWithInfer): Raises: TypeError: If `shape` is not a tuple. - ValueError: if the target and input shapes are incompatible, or if a -1 in the - target shape is in an invalid location. + ValueError: if the target and input shapes are incompatible, or if a - 1 in the target shape is in an invalid + location. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` @@ -5150,6 +5150,9 @@ class GatherD(PrimitiveWithInfer): Gathers values along an axis specified by dim. For a 3-D tensor, the output is: + + .. code-block:: + output[i][j][k] = x[index[i][j][k]][j][k] # if dim == 0 output[i][j][k] = x[i][index[i][j][k]][k] # if dim == 1 diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 6ec8c5ea8f..9f4395ac75 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -3864,11 +3864,10 @@ class Sign(PrimitiveWithInfer): r""" Performs sign on the tensor element-wise. - Note: - .. math:: - sign(x) = \begin{cases} -1, &if\ x < 0 \cr - 0, &if\ x = 0 \cr - 1, &if\ x > 0\end{cases} + .. math:: + sign(x) = \begin{cases} -1, &if\ x < 0 \cr + 0, &if\ x = 0 \cr + 1, &if\ x > 0\end{cases} Inputs: - **input_x** (Tensor) - The input tensor. diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index b836df973b..aa6141780a 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -2255,14 +2255,17 @@ class SoftmaxCrossEntropyWithLogits(PrimitiveWithInfer): r""" Gets the softmax cross-entropy value between logits and labels with one-hot encoding. - Note: - Sets input logits as `X`, input label as `Y`, output as `loss`. Then, - - .. math:: - p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} + The updating formulas of SoftmaxCrossEntropyWithLogits algorithm are as follows, - .. math:: + .. math:: + \begin{array}{ll} \\ + p_{ij} = softmax(X_{ij}) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)} \\ loss_{ij} = -\sum_j{Y_{ij} * ln(p_{ij})} + \end{array} + + where :math:`X` represents `logits`. + :math:`Y` represents `label`. + :math:`loss` represents `output`. Inputs: - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32. @@ -2450,12 +2453,15 @@ class SmoothL1Loss(PrimitiveWithInfer): SmoothL1Loss is a Loss similar to MSELoss but less sensitive to outliers as described in the `Fast R-CNN `_ by Ross Girshick. - Note: - Sets input prediction as `X`, input target as `Y`, output as `loss`. Then, + The updating formulas of SmoothL1Loss algorithm are as follows, - .. math:: - \text{SmoothL1Loss} = \begin{cases} \frac{0.5 x^{2}}{\text{beta}}, &if \left |x \right | < \text{beta} \cr - \left |x \right|-0.5 \text{beta}, &\text{otherwise}\end{cases} + .. math:: + \text{SmoothL1Loss} = \begin{cases} \frac{0.5 x^{2}}{\text{beta}}, &if \left |x \right | < \text{beta} \cr + \left |x \right|-0.5 \text{beta}, &\text{otherwise}\end{cases} + + where :math:`X` represents `prediction`. + :math:`Y` represents `target`. + :math:`loss` represents `output`. Args: beta (float): A parameter used to control the point where the function will change from @@ -2739,28 +2745,25 @@ class SGD(PrimitiveWithCheck): class ApplyRMSProp(PrimitiveWithInfer): - """ + r""" Optimizer that implements the Root Mean Square prop(RMSProp) algorithm. Please refer to the usage in source code of `nn.RMSProp`. - Note: - Update `var` according to the RMSProp algorithm. - - .. math:: - s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2 - - .. math:: - m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} + \\epsilon}} \\nabla Q_{i}(w) + The updating formulas of ApplyRMSProp algorithm are as follows, - .. math:: + .. math:: + \begin{array}{ll} \\ + s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\ + m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} + \\epsilon}} \\nabla Q_{i}(w) \\ w = w - m_{t} + \end{array} - where :math:`w` represents `var`, which will be updated. - :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`, - :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`. - :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`. - :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`. - :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`. + where :math:`w` represents `var`, which will be updated. + :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`, + :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`. + :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`. + :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`. + :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`. Args: use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors @@ -2838,32 +2841,27 @@ class ApplyRMSProp(PrimitiveWithInfer): class ApplyCenteredRMSProp(PrimitiveWithInfer): - """ + r""" Optimizer that implements the centered RMSProp algorithm. Please refer to the usage in source code of `nn.RMSProp`. - Note: - Update `var` according to the centered RMSProp algorithm. - - .. math:: - g_{t} = \\rho g_{t-1} + (1 - \\rho)\\nabla Q_{i}(w) + The updating formulas of ApplyCenteredRMSProp algorithm are as follows, - .. math:: - s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2 - - .. math:: - m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} - g_{t}^2 + \\epsilon}} \\nabla Q_{i}(w) - - .. math:: + .. math:: + /begin{array}{ll} \\ + g_{t} = \\rho g_{t-1} + (1 - \\rho)\\nabla Q_{i}(w) \\ + s_{t} = \\rho s_{t-1} + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\ + m_{t} = \\beta m_{t-1} + \\frac{\\eta} {\\sqrt{s_{t} - g_{t}^2 + \\epsilon}} \\nabla Q_{i}(w) \\ w = w - m_{t} + /end{array} - where :math:`w` represents `var`, which will be updated. - :math:`g_{t}` represents `mean_gradient`, :math:`g_{t-1}` is the last momentent of :math:`g_{t}`. - :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`, - :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`. - :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`. - :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`. - :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`. + where :math:`w` represents `var`, which will be updated. + :math:`g_{t}` represents `mean_gradient`, :math:`g_{t-1}` is the last momentent of :math:`g_{t}`. + :math:`s_{t}` represents `mean_square`, :math:`s_{t-1}` is the last momentent of :math:`s_{t}`, + :math:`m_{t}` represents `moment`, :math:`m_{t-1}` is the last momentent of :math:`m_{t}`. + :math:`\\rho` represents `decay`. :math:`\\beta` is the momentum term, represents `momentum`. + :math:`\\epsilon` is a smoothing term to avoid division by zero, represents `epsilon`. + :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`. Args: use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors @@ -3020,7 +3018,7 @@ class L2Normalize(PrimitiveWithInfer): Args: axis (Union[list(int), tuple(int), int]): The starting axis for the input to apply the L2 normalization. - Default: 0. + Default: 0. epsilon (float): A small value added for numerical stability. Default: 1e-4. Inputs: @@ -4865,22 +4863,24 @@ class KLDivLoss(PrimitiveWithInfer): r""" Computes the Kullback-Leibler divergence between the target and the output. - Note: - Sets input as :math:`x`, input label as :math:`y`, output as :math:`\ell(x, y)`. - Let, + The updating formulas of KLDivLoss algorithm are as follows, - .. math:: - L = \{l_1,\dots,l_N\}^\top, \quad - l_n = y_n \cdot (\log y_n - x_n) + .. math:: + L = \{l_1,\dots,l_N\}^\top, \quad + l_n = y_n \cdot (\log y_n - x_n) Then, - .. math:: - \ell(x, y) = \begin{cases} - L, & \text{if reduction} = \text{'none';}\\ - \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ - \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} - \end{cases} + .. math:: + \ell(x, y) = \begin{cases} + L, & \text{if reduction} = \text{'none';}\\ + \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ + \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} + \end{cases} + + where :math:`x` represents `input`. + :math:`y` represents `label`. + :math:`\ell(x, y)` represents `output`. Args: reduction (str): Specifies the reduction to be applied to the output.