!10868 update the documentation of MaxPoolWithArgmax, BroadcastTo, GlobalBatchNorm and DistributedGradReducer operators.

From: @wangshuide2020 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghui
5 years ago · 634526286a
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -575,7 +575,7 @@ class GlobalBatchNorm(_BatchNorm):
        >>>
        >>> device_id = int(os.environ["DEVICE_ID"])
        >>> context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True,
        >>>                     device_id=int(device_id))
        ...                     device_id=int(device_id))
        >>> init()
        >>> context.reset_auto_parallel_context()
        >>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@@ -268,47 +268,45 @@ class DistributedGradReducer(Cell):
        >>> context.reset_auto_parallel_context()
        >>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
        >>>
        >>>
        >>> class TrainingWrapper(nn.Cell):
        >>>     def __init__(self, network, optimizer, sens=1.0):
        >>>         super(TrainingWrapper, self).__init__(auto_prefix=False)
        >>>         self.network = network
        >>>         self.network.add_flags(defer_inline=True)
        >>>         self.weights = optimizer.parameters
        >>>         self.optimizer = optimizer
        >>>         self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        >>>         self.sens = sens
        >>>         self.reducer_flag = False
        >>>         self.grad_reducer = None
        >>>         self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
        >>>         if self.parallel_mode in [ParallelMode.DATA_PARALLEL,
        >>>                                            ParallelMode.HYBRID_PARALLEL]:
        >>>             self.reducer_flag = True
        >>>         if self.reducer_flag:
        >>>             mean = _get_gradients_mean()
        >>>             degree = _get_device_num()
        >>>             self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
        >>>
        >>>     def construct(self, *args):
        >>>         weights = self.weights
        >>>         loss = self.network(*args)
        >>>         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
        >>>         grads = self.grad(self.network, weights)(*args, sens)
        >>>         if self.reducer_flag:
        >>>             # apply grad reducer on grads
        >>>             grads = self.grad_reducer(grads)
        >>>         return F.depend(loss, self.optimizer(grads))
        ...     def __init__(self, network, optimizer, sens=1.0):
        ...         super(TrainingWrapper, self).__init__(auto_prefix=False)
        ...         self.network = network
        ...         self.network.add_flags(defer_inline=True)
        ...         self.weights = optimizer.parameters
        ...         self.optimizer = optimizer
        ...         self.grad = C.GradOperation(get_by_list=True, sens_param=True)
        ...         self.sens = sens
        ...         self.reducer_flag = False
        ...         self.grad_reducer = None
        ...         self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
        ...         if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
        ...             self.reducer_flag = True
        ...         if self.reducer_flag:
        ...             mean = _get_gradients_mean()
        ...             degree = _get_device_num()
        ...             self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
        ...
        ...     def construct(self, *args):
        ...         weights = self.weights
        ...         loss = self.network(*args)
        ...         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
        ...         grads = self.grad(self.network, weights)(*args, sens)
        ...         if self.reducer_flag:
        ...             # apply grad reducer on grads
        ...             grads = self.grad_reducer(grads)
        ...         return F.depend(loss, self.optimizer(grads))
        >>>
        >>> class Net(nn.Cell):
        >>>     def __init__(self, in_features, out_features):
        >>>         super(Net, self).__init__()
        >>>         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
        >>>                                 name='weight')
        >>>         self.matmul = P.MatMul()
        >>>
        >>>     def construct(self, x):
        >>>         output = self.matmul(x, self.weight)
        >>>         return output
        ...     def __init__(self, in_features, out_features):
        ...         super(Net, self).__init__()
        ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
        ...                                 name='weight')
        ...         self.matmul = P.MatMul()
        ...
        ...     def construct(self, x):
        ...         output = self.matmul(x, self.weight)
        ...         return output
        >>>
        >>> size, in_features, out_features = 16, 16, 10
        >>> network = Net(in_features, out_features)
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -4105,12 +4105,12 @@ class BroadcastTo(PrimitiveWithInfer):
    When input shape is broadcast to target shape, it starts with the trailing dimensions.

    Raises:
        ValueError: Given a shape tuple, if it has several -1s; or if the -1 is in an invalid position
            such as one that does not have a opposing dimension in an input tensor; of if the target and
        ValueError: Given a shape tuple, if it has several -1; or if the -1 is in an invalid position
            such as one that does not have a opposing dimension in an input tensor; or if the target and
            input shapes are incompatiable.

    Args:
        shape (tuple): The target shape to broadcast. Can be fully specified, or have '-1's in one position
        shape (tuple): The target shape to broadcast. Can be fully specified, or have -1 in one position
            where it will be substituted by the input tensor's shape in that position, see example.

    Inputs:
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -1566,7 +1566,8 @@ class MaxPoolWithArgmax(_Pool):
        Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.

        - **output** (Tensor) -  Maxpooling result, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
        - **mask** (Tensor) -  Max values' index represented by the mask.
          It has the same data type as `input`.
        - **mask** (Tensor) -  Max values' index represented by the mask. Data type is int32.

    Supported Platforms:
        ``Ascend``