diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py index d17658cb47..a92c4b6cf4 100755 --- a/mindspore/nn/cell.py +++ b/mindspore/nn/cell.py @@ -1061,7 +1061,9 @@ class Cell(Cell_): def set_grad(self, requires_grad=True): """ - Sets the cell flag for gradient. + Sets the cell flag for gradient. In pynative mode, this parameter specifies whether the network require + gradients. If True, the backward network needed to compute the gradients will be generated when the forward + network is executed. Args: requires_grad (bool): Specifies if the net need to grad, if it is @@ -1075,7 +1077,8 @@ class Cell(Cell_): Sets the cell to training mode. The cell itself and all children cells will be set to training mode. Layers that have different constructions - for training and predicting , such as `BatchNorm`, will distinguish between the branches by this attribute. + for training and predicting, such as `BatchNorm`, will distinguish between the branches by this attribute. If + set to True, the training branch will be executed, otherwise another branch. Args: mode (bool): Specifies whether the model is training. Default: True. @@ -1214,6 +1217,9 @@ class GraphKernel(Cell): auto_prefix (bool): Recursively generate namespaces. Default: True. flags (dict) : Set graph flags. Default: None. + Supported Platforms: + ``Ascend`` ``GPU`` + Examples: >>> class Relu(nn.GraphKernel): ... def __init__(self): @@ -1243,6 +1249,9 @@ class GraphCell(Cell): Args: graph (object): A compiled graph loaded from MindIR. + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + Examples: >>> import numpy as np >>> import mindspore.nn as nn diff --git a/mindspore/nn/layer/activation.py b/mindspore/nn/layer/activation.py index 1578194a72..496f2572e0 100644 --- a/mindspore/nn/layer/activation.py +++ b/mindspore/nn/layer/activation.py @@ -746,6 +746,9 @@ def get_activation(name): Returns: Function, the activation function. + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + Examples: >>> sigmoid = nn.get_activation('sigmoid') """ diff --git a/mindspore/nn/optim/ada_grad.py b/mindspore/nn/optim/ada_grad.py index 3b1253c32a..fa27efbcf6 100644 --- a/mindspore/nn/optim/ada_grad.py +++ b/mindspore/nn/optim/ada_grad.py @@ -95,7 +95,11 @@ class Adagrad(Optimizer): equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float. Default: 0.001. update_slots (bool): If true, update accumulation. Default: True. - loss_scale (float): Value for the loss scale. It must be greater than 0.0. Default: 1.0. + loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value. + Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. weight_decay (Union[float, int]): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0. diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py index bb0f737ea9..de61a152cd 100755 --- a/mindspore/nn/optim/adam.py +++ b/mindspore/nn/optim/adam.py @@ -264,7 +264,11 @@ class Adam(Optimizer): If true, update the gradients using NAG. If false, update the gradients without using NAG. Default: False. weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0. - loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0. + loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the + default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. @@ -561,7 +565,11 @@ class AdamOffload(Optimizer): If true, update the gradients using NAG. If false, update the gradients without using NAG. Default: False. weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0. - loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0. + loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the + default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py index b6b235fb4f..3630d98cb5 100644 --- a/mindspore/nn/optim/ftrl.py +++ b/mindspore/nn/optim/ftrl.py @@ -144,7 +144,11 @@ class FTRL(Optimizer): l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0. l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0. use_locking (bool): If true, use locks for updating operation. Default: False. - loss_scale (float): Value for the loss scale. It must be equal to or greater than 1.0. Default: 1.0. + loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value. + Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. weight_decay (Union[float, int]): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0. diff --git a/mindspore/nn/optim/lazyadam.py b/mindspore/nn/optim/lazyadam.py index 0d89637a6d..8201fc5faa 100644 --- a/mindspore/nn/optim/lazyadam.py +++ b/mindspore/nn/optim/lazyadam.py @@ -183,8 +183,11 @@ class LazyAdam(Optimizer): If true, update the gradients using NAG. If false, update the gradients without using NAG. Default: False. weight_decay (Union[float, int]): Weight decay (L2 penalty). Default: 0.0. - loss_scale (float): A floating point value for the loss scale. Should be equal to or greater than 1. Default: - 1.0. + loss_scale (float): A floating point value for the loss scale. Should be equal to or greater than 1. In general, + use the default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` + in `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py index b1fe65b6a8..74f4af81c0 100755 --- a/mindspore/nn/optim/momentum.py +++ b/mindspore/nn/optim/momentum.py @@ -101,7 +101,11 @@ class Momentum(Optimizer): momentum (float): Hyperparameter of type float, means momentum for the moving average. It must be at least 0.0. weight_decay (int, float): Weight decay (L2 penalty). It must be equal to or greater than 0.0. Default: 0.0. - loss_scale (float): A floating point value for the loss scale. It must be greater than 0.0. Default: 1.0. + loss_scale (float): A floating point value for the loss scale. It must be greater than 0.0. In general, use the + default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. use_nesterov (bool): Enable Nesterov momentum. Default: False. Inputs: diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py index da9ab0800d..a3a1a53680 100755 --- a/mindspore/nn/optim/optimizer.py +++ b/mindspore/nn/optim/optimizer.py @@ -88,7 +88,11 @@ class Optimizer(Cell): It must be equal to or greater than 0. If the type of `weight_decay` input is int, it will be converted to float. Default: 0.0. loss_scale (float): A floating point value for the loss scale. It must be greater than 0. If the - type of `loss_scale` input is int, it will be converted to float. Default: 1.0. + type of `loss_scale` input is int, it will be converted to float. In general, use the default value. Only + when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. Raises: TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule. diff --git a/mindspore/nn/optim/proximal_ada_grad.py b/mindspore/nn/optim/proximal_ada_grad.py index 22d486fc79..1a18111102 100644 --- a/mindspore/nn/optim/proximal_ada_grad.py +++ b/mindspore/nn/optim/proximal_ada_grad.py @@ -105,7 +105,11 @@ class ProximalAdagrad(Optimizer): l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0. l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0. use_locking (bool): If true, use locks for updating operation. Default: False. - loss_scale (float): Value for the loss scale. It must be greater than 0.0. Default: 1.0. + loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value. + Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. weight_decay (Union[float, int]): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0. diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py index 96d07b44b9..f50beb14b9 100644 --- a/mindspore/nn/optim/rmsprop.py +++ b/mindspore/nn/optim/rmsprop.py @@ -129,7 +129,11 @@ class RMSProp(Optimizer): use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors from being updated. Default: False. centered (bool): If true, gradients are normalized by the estimated variance of the gradient. Default: False. - loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0. + loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the + default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. weight_decay (Union[float, int]): Weight decay (L2 penalty). Should be equal to or greater than 0. Default: 0.0. Inputs: diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py index 78d241a812..99131f31ef 100755 --- a/mindspore/nn/optim/sgd.py +++ b/mindspore/nn/optim/sgd.py @@ -94,8 +94,11 @@ class SGD(Optimizer): weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0. nesterov (bool): Enables the Nesterov momentum. If use nesterov, momentum must be positive, and dampening must equal to 0.0. Default: False. - loss_scale (float): A floating point value for the loss scale, which must be larger - than 0.0. Default: 1.0. + loss_scale (float): A floating point value for the loss scale, which must be larger than 0.0. In general, use + the default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in + `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in + `FixedLossScaleManager`. Refer to class :class:`mindspore.FixedLossScaleManager` for more details. + Default: 1.0. Inputs: - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. diff --git a/mindspore/train/loss_scale_manager.py b/mindspore/train/loss_scale_manager.py index 94aa096d39..cca118e865 100644 --- a/mindspore/train/loss_scale_manager.py +++ b/mindspore/train/loss_scale_manager.py @@ -39,17 +39,25 @@ class FixedLossScaleManager(LossScaleManager): Fixed loss-scale manager. Args: - loss_scale (float): Loss scale. Default: 128.0. - drop_overflow_update (bool): whether to execute optimizer if there is an overflow. Default: True. + loss_scale (float): Loss scale. Note that if `drop_overflow_update` is set to False, the value of `loss_scale` + in optimizer that you used need to be set to the same value as here. Default: 128.0. + drop_overflow_update (bool): Whether to execute optimizer if there is an overflow. If True, the optimizer will + not executed when overflow occurs. Default: True. Examples: - >>> from mindspore import Model, nn - >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager + >>> from mindspore import Model, nn, FixedLossScaleManager >>> >>> net = Net() + >>> #1) Drop the parameter update if there is an overflow >>> loss_scale_manager = FixedLossScaleManager() >>> optim = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9) >>> model = Model(net, loss_scale_manager=loss_scale_manager, optimizer=optim) + >>> + >>> #2) Execute parameter update even if overflow occurs + >>> loss_scale = 1024 + >>> loss_scale_manager = FixedLossScaleManager(loss_scale, False) + >>> optim = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9, loss_scale=loss_scale) + >>> model = Model(net, loss_scale_manager=loss_scale_manager, optimizer=optim) """ def __init__(self, loss_scale=128.0, drop_overflow_update=True): if loss_scale < 1: