| @@ -325,9 +325,10 @@ class AdamWeightDecayDynamicLR(Optimizer): | |||||
| params (list[Parameter]): A list of parameter, which will be updated. The element in `params` | params (list[Parameter]): A list of parameter, which will be updated. The element in `params` | ||||
| should be class mindspore.Parameter. | should be class mindspore.Parameter. | ||||
| decay_steps (int): The steps of the decay. | decay_steps (int): The steps of the decay. | ||||
| warmup_steps (int): The steps of lr warm up. Default: 0. | |||||
| learning_rate (float): A floating point value for the learning rate. Default: 0.001. | learning_rate (float): A floating point value for the learning rate. Default: 0.001. | ||||
| end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001. | end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001. | ||||
| power (float): Power. Default: 10.0. | |||||
| power (float): The Power of the polynomial. Default: 10.0. | |||||
| beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9. | beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9. | ||||
| Should be in range (0.0, 1.0). | Should be in range (0.0, 1.0). | ||||
| beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999. | beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999. | ||||
| @@ -353,6 +354,7 @@ class AdamWeightDecayDynamicLR(Optimizer): | |||||
| def __init__(self, | def __init__(self, | ||||
| params, | params, | ||||
| decay_steps, | decay_steps, | ||||
| warmup_steps=0, | |||||
| learning_rate=0.001, | learning_rate=0.001, | ||||
| end_learning_rate=0.0001, | end_learning_rate=0.0001, | ||||
| power=10.0, | power=10.0, | ||||
| @@ -360,8 +362,7 @@ class AdamWeightDecayDynamicLR(Optimizer): | |||||
| beta2=0.999, | beta2=0.999, | ||||
| eps=1e-6, | eps=1e-6, | ||||
| weight_decay=0.0, | weight_decay=0.0, | ||||
| decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name, | |||||
| warmup_steps=0): | |||||
| decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): | |||||
| super(AdamWeightDecayDynamicLR, self).__init__(learning_rate, params) | super(AdamWeightDecayDynamicLR, self).__init__(learning_rate, params) | ||||
| if self.is_group: | if self.is_group: | ||||
| raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.") | raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.") | ||||