|
|
|
@@ -61,7 +61,8 @@ class SGD(Optimizer): |
|
|
|
dampening (float): A floating point value of dampening for momentum. Default: 0. |
|
|
|
weight_decay (float): Weight decay (L2 penalty). Default: 0. |
|
|
|
nesterov (bool): Enables the Nesterov momentum. Default: False. |
|
|
|
loss_scale (float): A floating point value for the loss scale. Default: 1.0. |
|
|
|
loss_scale (float): A floating point value for the loss scale, which should be larger |
|
|
|
than 0.0. Default: 1.0. |
|
|
|
|
|
|
|
Inputs: |
|
|
|
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. |
|
|
|
@@ -83,9 +84,18 @@ class SGD(Optimizer): |
|
|
|
|
|
|
|
super(SGD, self).__init__(learning_rate, params, weight_decay, loss_scale) |
|
|
|
|
|
|
|
if not isinstance(momentum, float): |
|
|
|
raise TypeError("momentum should be float number!") |
|
|
|
|
|
|
|
if isinstance(momentum, float) and momentum < 0.0: |
|
|
|
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) |
|
|
|
|
|
|
|
if not isinstance(dampening, float): |
|
|
|
raise TypeError("dampening should be float number") |
|
|
|
|
|
|
|
if isinstance(dampening, int): |
|
|
|
dampening = float(dampening) |
|
|
|
|
|
|
|
if dampening < 0.0: |
|
|
|
raise ValueError("dampening should be at least 0.0, but got dampening {}".format(dampening)) |
|
|
|
self.dampening = dampening |
|
|
|
|