You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

lr_scheduler.py 5.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Learning rate scheduler."""
  16. import math
  17. from collections import Counter
  18. import numpy as np
  19. def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
  20. """Linear learning rate."""
  21. lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
  22. lr = float(init_lr) + lr_inc * current_step
  23. return lr
  24. def warmup_step_lr(lr, lr_epochs, steps_per_epoch, warmup_epochs, max_epoch, gamma=0.1):
  25. """Warmup step learning rate."""
  26. base_lr = lr
  27. warmup_init_lr = 0
  28. total_steps = int(max_epoch * steps_per_epoch)
  29. warmup_steps = int(warmup_epochs * steps_per_epoch)
  30. milestones = lr_epochs
  31. milestones_steps = []
  32. for milestone in milestones:
  33. milestones_step = milestone * steps_per_epoch
  34. milestones_steps.append(milestones_step)
  35. lr_each_step = []
  36. lr = base_lr
  37. milestones_steps_counter = Counter(milestones_steps)
  38. for i in range(total_steps):
  39. if i < warmup_steps:
  40. lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
  41. else:
  42. lr = lr * gamma**milestones_steps_counter[i]
  43. lr_each_step.append(lr)
  44. return np.array(lr_each_step).astype(np.float32)
  45. def multi_step_lr(lr, milestones, steps_per_epoch, max_epoch, gamma=0.1):
  46. return warmup_step_lr(lr, milestones, steps_per_epoch, 0, max_epoch, gamma=gamma)
  47. def step_lr(lr, epoch_size, steps_per_epoch, max_epoch, gamma=0.1):
  48. lr_epochs = []
  49. for i in range(1, max_epoch):
  50. if i % epoch_size == 0:
  51. lr_epochs.append(i)
  52. return multi_step_lr(lr, lr_epochs, steps_per_epoch, max_epoch, gamma=gamma)
  53. def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0):
  54. """Cosine annealing learning rate."""
  55. base_lr = lr
  56. warmup_init_lr = 0
  57. total_steps = int(max_epoch * steps_per_epoch)
  58. warmup_steps = int(warmup_epochs * steps_per_epoch)
  59. lr_each_step = []
  60. for i in range(total_steps):
  61. last_epoch = i // steps_per_epoch
  62. if i < warmup_steps:
  63. lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
  64. else:
  65. lr = eta_min + (base_lr - eta_min) * (1. + math.cos(math.pi*last_epoch / T_max)) / 2
  66. lr_each_step.append(lr)
  67. return np.array(lr_each_step).astype(np.float32)
  68. def warmup_cosine_annealing_lr_V2(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0):
  69. """Cosine annealing learning rate V2."""
  70. base_lr = lr
  71. warmup_init_lr = 0
  72. total_steps = int(max_epoch * steps_per_epoch)
  73. warmup_steps = int(warmup_epochs * steps_per_epoch)
  74. last_lr = 0
  75. last_epoch_V1 = 0
  76. T_max_V2 = int(max_epoch*1/3)
  77. lr_each_step = []
  78. for i in range(total_steps):
  79. last_epoch = i // steps_per_epoch
  80. if i < warmup_steps:
  81. lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
  82. else:
  83. if i < total_steps*2/3:
  84. lr = eta_min + (base_lr - eta_min) * (1. + math.cos(math.pi*last_epoch / T_max)) / 2
  85. last_lr = lr
  86. last_epoch_V1 = last_epoch
  87. else:
  88. base_lr = last_lr
  89. last_epoch = last_epoch-last_epoch_V1
  90. lr = eta_min + (base_lr - eta_min) * (1. + math.cos(math.pi * last_epoch / T_max_V2)) / 2
  91. lr_each_step.append(lr)
  92. return np.array(lr_each_step).astype(np.float32)
  93. def warmup_cosine_annealing_lr_sample(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0):
  94. """Warmup cosine annealing learning rate."""
  95. start_sample_epoch = 60
  96. step_sample = 2
  97. tobe_sampled_epoch = 60
  98. end_sampled_epoch = start_sample_epoch + step_sample*tobe_sampled_epoch
  99. max_sampled_epoch = max_epoch+tobe_sampled_epoch
  100. T_max = max_sampled_epoch
  101. base_lr = lr
  102. warmup_init_lr = 0
  103. total_steps = int(max_epoch * steps_per_epoch)
  104. total_sampled_steps = int(max_sampled_epoch * steps_per_epoch)
  105. warmup_steps = int(warmup_epochs * steps_per_epoch)
  106. lr_each_step = []
  107. for i in range(total_sampled_steps):
  108. last_epoch = i // steps_per_epoch
  109. if last_epoch in range(start_sample_epoch, end_sampled_epoch, step_sample):
  110. continue
  111. if i < warmup_steps:
  112. lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
  113. else:
  114. lr = eta_min + (base_lr - eta_min) * (1. + math.cos(math.pi*last_epoch / T_max)) / 2
  115. lr_each_step.append(lr)
  116. assert total_steps == len(lr_each_step)
  117. return np.array(lr_each_step).astype(np.float32)