|
|
|
@@ -115,17 +115,12 @@ if __name__ == '__main__': |
|
|
|
step_size = dataset.get_dataset_size() |
|
|
|
|
|
|
|
loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) |
|
|
|
if config.lr_decay_mode == 'cosine': |
|
|
|
lr = Tensor(warmup_cosine_annealing_lr(0.035, |
|
|
|
step_size, |
|
|
|
config.warmup_epochs, |
|
|
|
50, |
|
|
|
config.T_max, |
|
|
|
config.eta_min)) |
|
|
|
else: |
|
|
|
lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max, |
|
|
|
warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size, |
|
|
|
)) |
|
|
|
lr = Tensor(warmup_cosine_annealing_lr(0.035, |
|
|
|
step_size, |
|
|
|
config.warmup_epochs, |
|
|
|
50, |
|
|
|
config.T_max, |
|
|
|
config.eta_min)) |
|
|
|
opt = THOR(filter(lambda x: x.requires_grad, net.get_parameters()), lr, |
|
|
|
config.momentum, damping, config.frequency, |
|
|
|
filter(lambda x: 'matrix_A' in x.name, net.get_parameters()), |
|
|
|
|