You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

learning_rate_schedule.py 15 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Learning rate schedule."""
  16. import math
  17. from ..common import dtype as mstype
  18. from ..ops import operations as P
  19. from .cell import Cell
  20. from .._checkparam import Validator as validator
  21. class LearningRateSchedule(Cell):
  22. """Basic class of learning rate schedule."""
  23. def __init__(self):
  24. super(LearningRateSchedule, self).__init__()
  25. def construct(self, global_step):
  26. """
  27. Defines the computation to get the current learning rate.
  28. This method must be overridden by all subclasses.
  29. Note:
  30. The output must be a Tensor of scalar.
  31. Inputs:
  32. Tensor. The current step number.
  33. """
  34. raise NotImplementedError
  35. def _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, cls_name):
  36. validator.check_positive_int(decay_steps, 'decay_steps', cls_name)
  37. validator.check_positive_float(learning_rate, 'learning_rate', cls_name)
  38. validator.check_is_float(learning_rate, 'learning_rate', cls_name)
  39. validator.check_positive_float(decay_rate, 'decay_rate', cls_name)
  40. validator.check_is_float(decay_rate, 'decay_rate', cls_name)
  41. validator.check_value_type('is_stair', is_stair, [bool], cls_name)
  42. class ExponentialDecayLR(LearningRateSchedule):
  43. r"""
  44. Calculates learning rate base on exponential decay function.
  45. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  46. .. math::
  47. decayed\_learning\_rate[i] = learning\_rate * decay\_rate^{p}
  48. Where :
  49. .. math::
  50. p = \frac{current\_step}{decay\_steps}
  51. If `is_stair` is True, the formula is :
  52. .. math::
  53. p = floor(\frac{current\_step}{decay\_steps})
  54. Args:
  55. learning_rate (float): The initial value of learning rate.
  56. decay_rate (float): The decay rate.
  57. decay_steps (int): A value used to calculate decayed learning rate.
  58. is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
  59. Inputs:
  60. Tensor. The current step number.
  61. Outputs:
  62. Tensor. The learning rate value for the current step.
  63. Examples:
  64. >>> learning_rate = 0.1
  65. >>> decay_rate = 0.9
  66. >>> decay_steps = 4
  67. >>> global_step = Tensor(2, mstype.int32)
  68. >>> exponential_decay_lr = nn.ExponentialDecayLR(learning_rate, decay_rate, decay_steps)
  69. >>> result = exponential_decay_lr(global_step)
  70. >>> print(result)
  71. 0.09486833
  72. """
  73. def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False):
  74. super(ExponentialDecayLR, self).__init__()
  75. _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name)
  76. self.learning_rate = learning_rate
  77. self.decay_rate = decay_rate
  78. self.decay_steps = decay_steps
  79. self.is_stair = is_stair
  80. self.pow = P.Pow()
  81. self.cast = P.Cast()
  82. def construct(self, global_step):
  83. p = self.cast(global_step, mstype.float32) / self.decay_steps
  84. if self.is_stair:
  85. p = P.Floor()(p)
  86. return self.learning_rate * self.pow(self.decay_rate, p)
  87. class NaturalExpDecayLR(LearningRateSchedule):
  88. r"""
  89. Calculates learning rate base on natural exponential decay function.
  90. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  91. .. math::
  92. decayed\_learning\_rate[i] = learning\_rate * e^{-decay\_rate * p}
  93. Where :
  94. .. math::
  95. p = \frac{current\_step}{decay\_steps}
  96. If `is_stair` is True, the formula is :
  97. .. math::
  98. p = floor(\frac{current\_step}{decay\_steps})
  99. Args:
  100. learning_rate (float): The initial value of learning rate.
  101. decay_rate (float): The decay rate.
  102. decay_steps (int): A value used to calculate decayed learning rate.
  103. is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
  104. Inputs:
  105. Tensor. The current step number.
  106. Outputs:
  107. Tensor. The learning rate value for the current step.
  108. Examples:
  109. >>> learning_rate = 0.1
  110. >>> decay_rate = 0.9
  111. >>> decay_steps = 4
  112. >>> global_step = Tensor(2, mstype.int32)
  113. >>> natural_exp_decay_lr = nn.NaturalExpDecayLR(learning_rate, decay_rate, decay_steps, True)
  114. >>> result = natural_exp_decay_lr(global_step)
  115. >>> print(result)
  116. 0.1
  117. """
  118. def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False):
  119. super(NaturalExpDecayLR, self).__init__()
  120. _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name)
  121. self.learning_rate = learning_rate
  122. self.decay_rate = decay_rate
  123. self.decay_steps = decay_steps
  124. self.is_stair = is_stair
  125. self.math_e = math.e
  126. self.pow = P.Pow()
  127. self.cast = P.Cast()
  128. def construct(self, global_step):
  129. p = self.cast(global_step, mstype.float32)
  130. if self.is_stair:
  131. p = P.FloorDiv()(p, self.decay_steps) * self.decay_steps
  132. return self.learning_rate * self.pow(self.math_e, -self.decay_rate * p)
  133. class InverseDecayLR(LearningRateSchedule):
  134. r"""
  135. Calculates learning rate base on inverse-time decay function.
  136. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  137. .. math::
  138. decayed\_learning\_rate[i] = learning\_rate / (1 + decay\_rate * p)
  139. Where :
  140. .. math::
  141. p = \frac{current\_step}{decay\_steps}
  142. If `is_stair` is True, The formula is :
  143. .. math::
  144. p = floor(\frac{current\_step}{decay\_steps})
  145. Args:
  146. learning_rate (float): The initial value of learning rate.
  147. decay_rate (float): The decay rate.
  148. decay_steps (int): A value used to calculate decayed learning rate.
  149. is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: False.
  150. Inputs:
  151. Tensor. The current step number.
  152. Outputs:
  153. Tensor. The learning rate value for the current step.
  154. Examples:
  155. >>> learning_rate = 0.1
  156. >>> decay_rate = 0.9
  157. >>> decay_steps = 4
  158. >>> global_step = Tensor(2, mstype.int32)
  159. >>> inverse_decay_lr = nn.InverseDecayLR(learning_rate, decay_rate, decay_steps, True)
  160. >>> result = inverse_decay_lr(global_step)
  161. >>> print(result)
  162. 0.1
  163. """
  164. def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False):
  165. super(InverseDecayLR, self).__init__()
  166. _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name)
  167. self.learning_rate = learning_rate
  168. self.decay_rate = decay_rate
  169. self.decay_steps = decay_steps
  170. self.is_stair = is_stair
  171. self.cast = P.Cast()
  172. def construct(self, global_step):
  173. p = self.cast(global_step, mstype.float32) / self.decay_steps
  174. if self.is_stair:
  175. p = P.Floor()(p)
  176. return self.learning_rate / (1 + self.decay_rate * p)
  177. class CosineDecayLR(LearningRateSchedule):
  178. r"""
  179. Calculates learning rate base on cosine decay function.
  180. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  181. .. math::
  182. decayed\_learning\_rate[i] = min\_learning\_rate + 0.5 * (max\_learning\_rate - min\_learning\_rate) *
  183. (1 + cos(\frac{current\_step}{decay\_steps}\pi))
  184. Args:
  185. min_lr (float): The minimum value of learning rate.
  186. max_lr (float): The maximum value of learning rate.
  187. decay_steps (int): A value used to calculate decayed learning rate.
  188. Inputs:
  189. Tensor. The current step number.
  190. Outputs:
  191. Tensor. The learning rate value for the current step.
  192. Examples:
  193. >>> min_lr = 0.01
  194. >>> max_lr = 0.1
  195. >>> decay_steps = 4
  196. >>> global_steps = Tensor(2, mstype.int32)
  197. >>> cosine_decay_lr = nn.CosineDecayLR(min_lr, max_lr, decay_steps)
  198. >>> result = cosine_decay_lr(global_steps)
  199. >>> print(result)
  200. 0.055
  201. """
  202. def __init__(self, min_lr, max_lr, decay_steps):
  203. super(CosineDecayLR, self).__init__()
  204. if not isinstance(min_lr, float):
  205. raise TypeError("min_lr must be float.")
  206. validator.check_non_negative_float(min_lr, "min_lr", self.cls_name)
  207. validator.check_positive_float(max_lr, 'max_lr', self.cls_name)
  208. validator.check_is_float(max_lr, 'max_lr', self.cls_name)
  209. validator.check_positive_int(decay_steps, "decay_steps", self.cls_name)
  210. if min_lr >= max_lr:
  211. raise ValueError('`max_lr` should be greater than `min_lr`.')
  212. self.min_lr = min_lr
  213. self.max_lr = max_lr
  214. self.decay_steps = decay_steps
  215. self.math_pi = math.pi
  216. self.delta = 0.5 * (max_lr - min_lr)
  217. self.cos = P.Cos()
  218. self.min = P.Minimum()
  219. self.cast = P.Cast()
  220. def construct(self, global_step):
  221. p = self.cast(self.min(global_step, self.decay_steps), mstype.float32)
  222. return self.min_lr + self.delta * (1.0 + self.cos(self.math_pi * p / self.decay_steps))
  223. class PolynomialDecayLR(LearningRateSchedule):
  224. r"""
  225. Calculates learning rate base on polynomial decay function.
  226. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  227. .. math::
  228. decayed\_learning\_rate[i] = (learning\_rate - end\_learning\_rate) *
  229. (1 - tmp\_step / tmp\_decay\_steps)^{power} + end\_learning\_rate
  230. Where :
  231. .. math::
  232. tmp\_step=min(current\_step, decay\_steps)
  233. If `update_decay_steps` is true, update the value of `tmp_decay_step` every `decay_steps`. The formula is :
  234. .. math::
  235. tmp\_decay\_steps = decay\_steps * ceil(current\_step / decay\_steps)
  236. Args:
  237. learning_rate (float): The initial value of learning rate.
  238. end_learning_rate (float): The end value of learning rate.
  239. decay_steps (int): A value used to calculate decayed learning rate.
  240. power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0.
  241. update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
  242. Inputs:
  243. Tensor. The current step number.
  244. Outputs:
  245. Tensor. The learning rate value for the current step.
  246. Examples:
  247. >>> learning_rate = 0.1
  248. >>> end_learning_rate = 0.01
  249. >>> decay_steps = 4
  250. >>> power = 0.5
  251. >>> global_step = Tensor(2, mstype.int32)
  252. >>> polynomial_decay_lr = nn.PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps, power)
  253. >>> result = polynomial_decay_lr(global_step)
  254. >>> print(result)
  255. 0.07363961
  256. """
  257. def __init__(self, learning_rate, end_learning_rate, decay_steps, power, update_decay_steps=False):
  258. super(PolynomialDecayLR, self).__init__()
  259. validator.check_positive_float(learning_rate, 'learning_rate')
  260. validator.check_is_float(learning_rate, 'learning_rate')
  261. if not isinstance(end_learning_rate, float):
  262. raise TypeError("end_learning_rate must be float.")
  263. validator.check_non_negative_float(end_learning_rate, "end_learning_rate", self.cls_name)
  264. validator.check_positive_int(decay_steps, 'decay_steps', self.cls_name)
  265. validator.check_value_type('update_decay_steps', update_decay_steps, [bool], self.cls_name)
  266. validator.check_positive_float(power, 'power', self.cls_name)
  267. validator.check_is_float(power, 'power', self.cls_name)
  268. self.decay_steps = decay_steps
  269. self.start_learning_rate = learning_rate
  270. self.end_learning_rate = end_learning_rate
  271. self.diff_learning_rate = learning_rate - end_learning_rate
  272. self.power = power
  273. self.update_decay_steps = update_decay_steps
  274. self.pow = P.Pow()
  275. self.ceil = P.Ceil()
  276. self.min = P.Minimum()
  277. self.max = P.Maximum()
  278. def construct(self, global_step):
  279. tmp_global_step = P.Cast()(global_step, mstype.float32)
  280. tmp_decay_step = self.decay_steps
  281. if self.update_decay_steps:
  282. tmp_decay_step = tmp_decay_step * self.max(self.ceil(tmp_global_step / tmp_decay_step), 1)
  283. else:
  284. tmp_global_step = self.min(tmp_global_step, tmp_decay_step)
  285. p = tmp_global_step / tmp_decay_step
  286. lr = self.diff_learning_rate * self.pow(1.0 - p, self.power) + self.end_learning_rate
  287. return lr
  288. class WarmUpLR(LearningRateSchedule):
  289. r"""
  290. Gets learning rate warming up.
  291. For the i-th step, the formula of computing warmup_learning_rate[i] is:
  292. .. math::
  293. warmup\_learning\_rate[i] = learning\_rate * tmp\_step / warmup\_steps
  294. Where :
  295. .. math:
  296. tmp\_step=min(current\_step, warmup\_steps)
  297. Args:
  298. learning_rate (float): The initial value of learning rate.
  299. warmup_steps (int): The warm up steps of learning rate.
  300. Inputs:
  301. Tensor. The current step number.
  302. Outputs:
  303. Tensor. The learning rate value for the current step.
  304. Examples:
  305. >>> learning_rate = 0.1
  306. >>> warmup_steps = 2
  307. >>> global_step = Tensor(2, mstype.int32)
  308. >>> warmup_lr = nn.WarmUpLR(learning_rate, warmup_steps)
  309. >>> result = warmup_lr(global_step)
  310. >>> print(result)
  311. 0.1
  312. """
  313. def __init__(self, learning_rate, warmup_steps):
  314. super(WarmUpLR, self).__init__()
  315. if not isinstance(learning_rate, float):
  316. raise TypeError("learning_rate must be float.")
  317. validator.check_non_negative_float(learning_rate, "learning_rate", self.cls_name)
  318. validator.check_positive_int(warmup_steps, 'warmup_steps', self.cls_name)
  319. self.warmup_steps = warmup_steps
  320. self.learning_rate = learning_rate
  321. self.min = P.Minimum()
  322. self.cast = P.Cast()
  323. def construct(self, global_step):
  324. warmup_percent = self.cast(self.min(global_step, self.warmup_steps), mstype.float32)/ self.warmup_steps
  325. return self.learning_rate * warmup_percent
  326. __all__ = [
  327. 'ExponentialDecayLR',
  328. 'NaturalExpDecayLR',
  329. 'InverseDecayLR',
  330. 'CosineDecayLR',
  331. 'PolynomialDecayLR',
  332. 'WarmUpLR'
  333. ]