You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_lr.py 15 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Dynamic Learning Rate"""
  16. import math
  17. from mindspore._checkparam import Validator as validator
  18. from mindspore._checkparam import Rel
  19. def piecewise_constant_lr(milestone, learning_rates):
  20. r"""
  21. Get piecewise constant learning rate.
  22. Calculate learning rate by given `milestone` and `learning_rates`. Let the value of `milestone` be
  23. :math:`(M_1, M_2, ..., M_N)` and the value of `learning_rates` be :math:`(x_1, x_2, ..., x_N)`. N is the length of
  24. `milestone`. Let the output learning rate be `y`.
  25. .. math::
  26. y[i] = x_t,\ for\ i \in [M_{t-1}, M_t)
  27. Args:
  28. milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list.
  29. Every element is a milestone step, and must be greater than 0.
  30. learning_rates (Union[list[float], tuple[float]]): A list of learning rates.
  31. Returns:
  32. list[float]. The size of list is :math:`M_N`.
  33. Examples:
  34. >>> milestone = [2, 5, 10]
  35. >>> learning_rates = [0.1, 0.05, 0.01]
  36. >>> piecewise_constant_lr(milestone, learning_rates)
  37. [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01]
  38. """
  39. validator.check_value_type('milestone', milestone, (tuple, list), None)
  40. validator.check_value_type('learning_rates', learning_rates, (tuple, list), None)
  41. if len(milestone) != len(learning_rates):
  42. raise ValueError('The size of `milestone` must be same with the size of `learning_rates`.')
  43. lr = []
  44. last_item = 0
  45. for i, item in enumerate(milestone):
  46. validator.check_positive_int(item, f'milestone[{i}]')
  47. validator.check_is_float(learning_rates[i], f'learning_rates[{i}]')
  48. if item < last_item:
  49. raise ValueError(f'The value of milestone[{i}] must be greater than milestone[{i - 1}]')
  50. lr += [learning_rates[i]] * (item - last_item)
  51. last_item = item
  52. return lr
  53. def _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair):
  54. validator.check_positive_int(total_step, 'total_step')
  55. validator.check_positive_int(step_per_epoch, 'step_per_epoch')
  56. validator.check_positive_int(decay_epoch, 'decay_epoch')
  57. validator.check_positive_float(learning_rate, 'learning_rate')
  58. validator.check_is_float(learning_rate, 'learning_rate')
  59. validator.check_positive_float(decay_rate, 'decay_rate')
  60. validator.check_is_float(decay_rate, 'decay_rate')
  61. validator.check_value_type('is_stair', is_stair, [bool], None)
  62. def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair=False):
  63. r"""
  64. Calculate learning rate base on exponential decay function.
  65. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  66. .. math::
  67. decayed\_learning\_rate[i] = learning\_rate * decay\_rate^{\frac{current\_epoch}{decay\_epoch}}
  68. Where :math:`current\_epoch=floor(\frac{i}{step\_per\_epoch})`.
  69. Args:
  70. learning_rate (float): The initial value of learning rate.
  71. decay_rate (float): The decay rate.
  72. total_step (int): The total number of steps.
  73. step_per_epoch (int): The number of steps in per epoch.
  74. decay_epoch (int): A value used to calculate decayed learning rate.
  75. is_stair (bool): If true, learning rate is decayed once every `decay_epoch` times. Default: False.
  76. Returns:
  77. list[float]. The size of list is `total_step`.
  78. Examples:
  79. >>> learning_rate = 0.1
  80. >>> decay_rate = 0.9
  81. >>> total_step = 6
  82. >>> step_per_epoch = 2
  83. >>> decay_epoch = 1
  84. >>> exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch)
  85. [0.1, 0.1, 0.09000000000000001, 0.09000000000000001, 0.08100000000000002, 0.08100000000000002]
  86. """
  87. _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
  88. lr = []
  89. for i in range(total_step):
  90. if is_stair:
  91. lr.append(learning_rate * decay_rate ** math.floor(math.floor(i / step_per_epoch) / decay_epoch))
  92. else:
  93. lr.append(learning_rate * decay_rate ** (math.floor(i / step_per_epoch) / decay_epoch))
  94. return lr
  95. def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair=False):
  96. r"""
  97. Calculate learning rate base on natural exponential decay function.
  98. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  99. .. math::
  100. decayed\_learning\_rate[i] = learning\_rate * e^{-decay\_rate * current\_epoch}
  101. Where :math:`current\_epoch=floor(\frac{i}{step\_per\_epoch})`.
  102. Args:
  103. learning_rate (float): The initial value of learning rate.
  104. decay_rate (float): The decay rate.
  105. total_step (int): The total number of steps.
  106. step_per_epoch (int): The number of steps in per epoch.
  107. decay_epoch (int): A value used to calculate decayed learning rate.
  108. is_stair (bool): If true, learning rate is decayed once every `decay_epoch` times. Default: False.
  109. Returns:
  110. list[float]. The size of list is `total_step`.
  111. Examples:
  112. >>> learning_rate = 0.1
  113. >>> decay_rate = 0.9
  114. >>> total_step = 6
  115. >>> step_per_epoch = 2
  116. >>> decay_epoch = 2
  117. >>> natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
  118. [0.1, 0.1, 0.1, 0.1, 0.016529888822158657, 0.016529888822158657]
  119. """
  120. _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
  121. function = lambda x, y: x
  122. if is_stair:
  123. function = lambda x, y: math.floor(x / y) * y
  124. lr = []
  125. for i in range(total_step):
  126. lr.append(learning_rate * math.e ** (-decay_rate * function(math.floor(i / step_per_epoch), decay_epoch)))
  127. return lr
  128. def inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair=False):
  129. r"""
  130. Calculate learning rate base on inverse-time decay function.
  131. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  132. .. math::
  133. decayed\_learning\_rate[i] = learning\_rate / (1 + decay\_rate * current\_epoch / decay\_epoch)
  134. Where :math:`current\_epoch=floor(\frac{i}{step\_per\_epoch})`.
  135. Args:
  136. learning_rate (float): The initial value of learning rate.
  137. decay_rate (float): The decay rate.
  138. total_step (int): The total number of steps.
  139. step_per_epoch (int): The number of steps in per epoch.
  140. decay_epoch (int): A value used to calculate decayed learning rate.
  141. is_stair (bool): If true, learning rate is decayed once every `decay_epoch` times. Default: False.
  142. Returns:
  143. list[float]. The size of list is `total_step`.
  144. Examples:
  145. >>> learning_rate = 0.1
  146. >>> decay_rate = 0.5
  147. >>> total_step = 6
  148. >>> step_per_epoch = 1
  149. >>> decay_epoch = 1
  150. >>> inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
  151. [0.1, 0.06666666666666667, 0.05, 0.04, 0.03333333333333333, 0.028571428571428574]
  152. """
  153. _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
  154. lr = []
  155. for i in range(total_step):
  156. if is_stair:
  157. lr.append(learning_rate / (1 + decay_rate * math.floor(math.floor(i / step_per_epoch) / decay_epoch)))
  158. else:
  159. lr.append(learning_rate / (1 + decay_rate * math.floor(i / step_per_epoch) / decay_epoch))
  160. return lr
  161. def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch):
  162. r"""
  163. Calculate learning rate base on cosine decay function.
  164. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  165. .. math::
  166. decayed\_learning\_rate[i] = min\_learning\_rate + 0.5 * (max\_learning\_rate - min\_learning\_rate) *
  167. (1 + cos(\frac{current\_epoch}{decay\_epoch}\pi))
  168. Where :math:`current\_epoch=floor(\frac{i}{step\_per\_epoch})`.
  169. Args:
  170. min_lr (float): The minimum value of learning rate.
  171. max_lr (float): The maximum value of learning rate.
  172. total_step (int): The total number of steps.
  173. step_per_epoch (int): The number of steps in per epoch.
  174. decay_epoch (int): A value used to calculate decayed learning rate.
  175. Returns:
  176. list[float]. The size of list is `total_step`.
  177. Examples:
  178. >>> min_lr = 0.01
  179. >>> max_lr = 0.1
  180. >>> total_step = 6
  181. >>> step_per_epoch = 2
  182. >>> decay_epoch = 2
  183. >>> cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch)
  184. [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01]
  185. """
  186. if not isinstance(min_lr, float):
  187. raise TypeError("min_lr must be float.")
  188. validator.check_number_range("min_lr", min_lr, 0.0, float("inf"), Rel.INC_LEFT, None)
  189. validator.check_positive_float(max_lr, 'max_lr')
  190. validator.check_is_float(max_lr, 'max_lr')
  191. validator.check_positive_int(total_step, 'total_step')
  192. validator.check_positive_int(step_per_epoch, 'step_per_epoch')
  193. validator.check_positive_int(decay_epoch, 'decay_epoch')
  194. if min_lr >= max_lr:
  195. raise ValueError('`max_lr` should be greater than `min_lr`.')
  196. delta = 0.5 * (max_lr - min_lr)
  197. lr = []
  198. for i in range(total_step):
  199. tmp_epoch = min(math.floor(i / step_per_epoch), decay_epoch)
  200. lr.append(min_lr + delta * (1 + math.cos(math.pi * tmp_epoch / decay_epoch)))
  201. return lr
  202. def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power,
  203. update_decay_epoch=False):
  204. r"""
  205. Calculate learning rate base on polynomial decay function.
  206. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  207. .. math::
  208. decayed\_learning\_rate[i] = (learning\_rate - end\_learning\_rate) *
  209. (1 - tmp\_epoch / tmp\_decay\_epoch)^{power} + end\_learning\_rate
  210. Where:
  211. .. math::
  212. tmp\_epoch = min(current\_epoch, decay\_epoch)
  213. .. math::
  214. current\_epoch=floor(\frac{i}{step\_per\_epoch})
  215. .. math::
  216. tmp\_decay\_epoch = decay\_epoch
  217. If `update_decay_epoch` is true, update the value of `tmp_decay_epoch` every epoch. The formula is:
  218. .. math::
  219. tmp\_decay\_epoch = decay\_epoch * ceil(current\_epoch / decay\_epoch)
  220. Args:
  221. learning_rate (float): The initial value of learning rate.
  222. end_learning_rate (float): The end value of learning rate.
  223. total_step (int): The total number of steps.
  224. step_per_epoch (int): The number of steps in per epoch.
  225. decay_epoch (int): A value used to calculate decayed learning rate.
  226. power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0.
  227. update_decay_epoch (bool): If true, update `decay_epoch`. Default: False.
  228. Returns:
  229. list[float]. The size of list is `total_step`.
  230. Examples:
  231. >>> learning_rate = 0.1
  232. >>> end_learning_rate = 0.01
  233. >>> total_step = 6
  234. >>> step_per_epoch = 2
  235. >>> decay_epoch = 2
  236. >>> power = 0.5
  237. >>> polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power)
  238. [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01]
  239. """
  240. validator.check_positive_float(learning_rate, 'learning_rate')
  241. validator.check_is_float(learning_rate, 'learning_rate')
  242. if not isinstance(end_learning_rate, float):
  243. raise TypeError("end_learning_rate must be float.")
  244. validator.check_number_range("end_learning_rate", end_learning_rate, 0.0, float("inf"), Rel.INC_LEFT, None)
  245. validator.check_positive_float(power, 'power')
  246. validator.check_is_float(power, 'power')
  247. validator.check_positive_int(total_step, 'total_step')
  248. validator.check_positive_int(step_per_epoch, 'step_per_epoch')
  249. validator.check_positive_int(decay_epoch, 'decay_epoch')
  250. validator.check_value_type('update_decay_epoch', update_decay_epoch, [bool], None)
  251. origin_decay_epoch = decay_epoch
  252. function = lambda x, y: (x, min(x, y))
  253. if update_decay_epoch:
  254. function = lambda x, y: (origin_decay_epoch * max(math.ceil(y / origin_decay_epoch), 1), y)
  255. lr = []
  256. delta = learning_rate - end_learning_rate
  257. for i in range(total_step):
  258. current_epoch = math.floor(i / step_per_epoch)
  259. decay_epoch, tmp_epoch = function(decay_epoch, current_epoch)
  260. lr.append(delta * (1 - tmp_epoch / decay_epoch) ** power + end_learning_rate)
  261. return lr
  262. def warmup_lr(learning_rate, total_step, step_per_epoch, warmup_epoch):
  263. r"""
  264. Get learning rate warming up.
  265. For the i-th step, the formula of computing warmup_learning_rate[i] is:
  266. .. math::
  267. warmup\_learning\_rate[i] = learning\_rate * tmp\_epoch / tmp\_warmup\_epoch
  268. Where :math:`tmp\_epoch=min(current\_epoch, warmup\_epoch),\ current\_epoch=floor(\frac{i}{step\_per\_epoch})`
  269. Args:
  270. learning_rate (float): The initial value of learning rate.
  271. warmup_steps (int): The warm up steps of learning rate.
  272. Inputs:
  273. Tensor. The current step number.
  274. Returns:
  275. Tensor. The learning rate value for the current step.
  276. Examples:
  277. >>> learning_rate = 0.1
  278. >>> total_step = 6
  279. >>> step_per_epoch = 2
  280. >>> warmup_epoch = 2
  281. >>> warmup_lr(learning_rate, total_step, step_per_epoch, warmup_epoch)
  282. [0.0, 0.0, 0.05, 0.05, 0.1, 0.1]
  283. """
  284. if not isinstance(learning_rate, float):
  285. raise TypeError("learning_rate must be float.")
  286. validator.check_number_range("learning_rate", learning_rate, 0.0, float("inf"), Rel.INC_LEFT, None)
  287. validator.check_positive_int(warmup_epoch, 'warmup_epoch')
  288. validator.check_positive_int(total_step, 'total_step')
  289. validator.check_positive_int(step_per_epoch, 'step_per_epoch')
  290. function = lambda x, y: (x, min(x, y))
  291. lr = []
  292. for i in range(total_step):
  293. current_epoch = math.floor(i / step_per_epoch)
  294. warmup_epoch, tmp_epoch = function(warmup_epoch, current_epoch)
  295. lr.append(learning_rate * tmp_epoch/ warmup_epoch)
  296. return lr
  297. __all__ = [
  298. 'piecewise_constant_lr',
  299. 'exponential_decay_lr',
  300. 'natural_exp_decay_lr',
  301. 'inverse_decay_lr',
  302. 'cosine_decay_lr',
  303. 'polynomial_decay_lr',
  304. 'warmup_lr'
  305. ]