Add some description to API about optimizer.

5 years ago · 52790b74e6
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -162,13 +162,14 @@ class Adam(Optimizer):
              in the value of 'order_params' but not in any group will use default learning rate and default weight
              decay.

        learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
                                                        Iterable or a Tensor and the dims of the Tensor is 1,
                                                        use dynamic learning rate, then the i-th step will
                                                        take the i-th value as the learning rate.
                                                        When the learning_rate is float or learning_rate is a Tensor
                                                        but the dims of the Tensor is 0, use fixed learning rate.
                                                        Other cases are not supported. Default: 1e-3.
        learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
                                                             Iterable or a Tensor and the dims of the Tensor is 1,
                                                             use dynamic learning rate, then the i-th step will
                                                             take the i-th value as the learning rate.
                                                             When the learning_rate is float or learning_rate is a
                                                             Tensor but the dims of the Tensor is 0, use fixed learning
                                                             rate. Other cases are not supported. It should be equal to
                                                             or greater than 0. Default: 1e-3.
        beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). Default:
                       0.9.
        beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). Default:
@@ -181,7 +182,7 @@ class Adam(Optimizer):
        use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
            If True, updates the gradients using NAG.
            If False, updates the gradients without using NAG. Default: False.
        weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
        loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0.

    Inputs:
--- a/mindspore/nn/optim/lamb.py
+++ b/mindspore/nn/optim/lamb.py
@@ -143,10 +143,12 @@ class Lamb(Optimizer):
        params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
                                  should be class mindspore.Parameter.
        decay_steps (int): The steps of the lr decay. Should be equal to or greater than 1.
        warmup_steps (int): The steps of lr warm up. Default: 0.
        start_learning_rate (float): A floating point value for the learning rate. Default: 0.1.
        end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001.
        power (float): The power of the polynomial. Default: 1.0.
        warmup_steps (int): The steps of lr warm up. Should be equal to or greater than 0. Default: 0.
        start_learning_rate (float): A floating point value for the learning rate. Should be equal to
            or greater than 0. Default: 0.1.
        end_learning_rate (float): A floating point value for the end learning rate. Should be equal to
            or greater than 0. Default: 0.0001.
        power (float): The power of the polynomial. It must be positive. Default: 1.0.
        beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9.
            Should be in range (0.0, 1.0).
        beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999.
--- a/mindspore/nn/optim/lars.py
+++ b/mindspore/nn/optim/lars.py
@@ -59,13 +59,13 @@ class LARS(Optimizer):
        optimizer (Optimizer): MindSpore optimizer for which to wrap and modify gradients.
        epsilon (float): Term added to the denominator to improve numerical stability. Default: 1e-05.
        hyperpara (float): Trust coefficient for calculating the local learning rate. Default: 0.001.
        weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
        use_clip (bool): Whether to use clip operation for calculating the local learning rate. Default: False.
        decay_filter (Function): A function to determine whether apply weight decay on parameters. Default:
                                 lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
        lars_filter (Function): A function to determine whether apply lars algorithm. Default:
                                lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
        loss_scale (float): A floating point value for the loss scale. Default: 1.0.
        loss_scale (float): A floating point value for the loss scale. It should be greater than 0. Default: 1.0.

    Inputs:
        - **gradients** (tuple[Tensor]) - The gradients of `params` in optimizer, the shape is
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -73,10 +73,11 @@ class SGD(Optimizer):
                                                        take the i-th value as the learning rate.
                                                        When the learning_rate is float or learning_rate is a Tensor
                                                        but the dims of the Tensor is 0, use fixed learning rate.
                                                        Other cases are not supported. Default: 0.1.
        momentum (float): A floating point value the momentum. Default: 0.0.
        dampening (float): A floating point value of dampening for momentum. Default: 0.0.
        weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
                                                        Other cases are not supported. It should be equal to or
                                                        greater than 0. Default: 0.1.
        momentum (float): A floating point value the momentum. should be at least 0.0. Default: 0.0.
        dampening (float): A floating point value of dampening for momentum. should be at least 0.0. Default: 0.0.
        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
        nesterov (bool): Enables the Nesterov momentum. Default: False.
        loss_scale (float): A floating point value for the loss scale, which should be larger
                            than 0.0. Default: 1.0.
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -3159,7 +3159,7 @@ class SparseApplyFtrl(PrimitiveWithInfer):
        validator.check_value_type("l1", l1, [float], self.name)
        validator.check_value_type("l2", l2, [float], self.name)
        validator.check_value_type("lr_power", lr_power, [float], self.name)
        self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_LEFT, self.name)
        self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_NEITHER, self.name)
        self.l1 = validator.check_number("l1", l1, 0.0, Rel.GE, self.name)
        self.l2 = validator.check_number("l2", l2, 0.0, Rel.GE, self.name)
        self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
@@ -3350,7 +3350,7 @@ class CTCLoss(PrimitiveWithInfer):
    """

    @prim_attr_register
    def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False,
    def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=True,
                 ignore_longer_outputs_than_inputs=False):
        self.init_prim_io_names(inputs=["inputs", "labels_indices", "labels_values", "sequence_length"],
                                outputs=["loss", "gradient"])