From 994b16c4cc53c47f89f9ce98dfc4b39e1dd99669 Mon Sep 17 00:00:00 2001
From: yoonlee666 <qiuyunlei@huawei.com>
Date: Sat, 30 May 2020 16:22:03 +0800
Subject: [PATCH] adjust warmup_steps in AdamWeightDecayDynamicLR

---
 mindspore/nn/optim/adam.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 2138aed741..a256f0e0d8 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -325,9 +325,10 @@ class AdamWeightDecayDynamicLR(Optimizer):
         params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
                                   should be class mindspore.Parameter.
         decay_steps (int): The steps of the decay.
+        warmup_steps (int): The steps of lr warm up. Default: 0.
         learning_rate (float): A floating point value for the learning rate. Default: 0.001.
         end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001.
-        power (float): Power. Default: 10.0.
+        power (float): The Power of the polynomial. Default: 10.0.
         beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9.
             Should be in range (0.0, 1.0).
         beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999.
@@ -353,6 +354,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
     def __init__(self,
                  params,
                  decay_steps,
+                 warmup_steps=0,
                  learning_rate=0.001,
                  end_learning_rate=0.0001,
                  power=10.0,
@@ -360,8 +362,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
                  beta2=0.999,
                  eps=1e-6,
                  weight_decay=0.0,
-                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name,
-                 warmup_steps=0):
+                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
         super(AdamWeightDecayDynamicLR, self).__init__(learning_rate, params)
         if self.is_group:
             raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.")