You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

learning_rate_schedule.py 18 kB

4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Learning rate schedule."""
  16. import math
  17. from ..common import dtype as mstype
  18. from ..ops import operations as P
  19. from .cell import Cell
  20. from .._checkparam import Validator as validator
  21. class LearningRateSchedule(Cell):
  22. """Basic class of learning rate schedule."""
  23. def __init__(self):
  24. super(LearningRateSchedule, self).__init__()
  25. def construct(self, global_step):
  26. """
  27. Defines the computation to get the current learning rate.
  28. This method must be overridden by all subclasses.
  29. Note:
  30. The output must be a Tensor of scalar.
  31. Inputs:
  32. - **global_step** (Tensor) - The current step number.
  33. Inputs:
  34. Tensor. Learning rate at current step with shape :math:`()`.
  35. """
  36. raise NotImplementedError
  37. def _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, cls_name):
  38. validator.check_positive_int(decay_steps, 'decay_steps', cls_name)
  39. validator.check_positive_float(learning_rate, 'learning_rate', cls_name)
  40. validator.check_is_float(learning_rate, 'learning_rate', cls_name)
  41. validator.check_positive_float(decay_rate, 'decay_rate', cls_name)
  42. validator.check_is_float(decay_rate, 'decay_rate', cls_name)
  43. validator.check_value_type('is_stair', is_stair, [bool], cls_name)
  44. class ExponentialDecayLR(LearningRateSchedule):
  45. r"""
  46. Calculates learning rate based on exponential decay function.
  47. For current step, the formula of computing decayed_learning_rate[current_step] is:
  48. .. math::
  49. decayed\_learning\_rate[current\_step] = learning\_rate * decay\_rate^{p}
  50. Where :
  51. .. math::
  52. p = \frac{current\_step}{decay\_steps}
  53. If `is_stair` is True, the formula is :
  54. .. math::
  55. p = floor(\frac{current\_step}{decay\_steps})
  56. Args:
  57. learning_rate (float): The initial value of learning rate.
  58. decay_rate (float): The decay rate.
  59. decay_steps (int): A value used to calculate decayed learning rate.
  60. is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
  61. Inputs:
  62. - **global_step** (Tensor) - The current step number.
  63. Outputs:
  64. Tensor. The learning rate value for the current step with shape :math:`()`.
  65. Raises:
  66. TypeError: If `learning_rate` or `decay_rate` is not a float.
  67. TypeError: If `decay_steps` is not an int or `is_stair` is not a bool.
  68. ValueError: If `decay_steps` is less than 1.
  69. ValueError: If `learning_rate` or `decay_rate` is less than or equal to 0.
  70. Supported Platforms:
  71. ``Ascend`` ``GPU`` ``CPU``
  72. Examples:
  73. >>> learning_rate = 0.1
  74. >>> decay_rate = 0.9
  75. >>> decay_steps = 4
  76. >>> global_step = Tensor(2, mstype.int32)
  77. >>> exponential_decay_lr = nn.ExponentialDecayLR(learning_rate, decay_rate, decay_steps)
  78. >>> result = exponential_decay_lr(global_step)
  79. >>> print(result)
  80. 0.09486833
  81. """
  82. def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False):
  83. super(ExponentialDecayLR, self).__init__()
  84. _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name)
  85. self.learning_rate = learning_rate
  86. self.decay_rate = decay_rate
  87. self.decay_steps = decay_steps
  88. self.is_stair = is_stair
  89. self.pow = P.Pow()
  90. self.cast = P.Cast()
  91. def construct(self, global_step):
  92. p = self.cast(global_step, mstype.float32) / self.decay_steps
  93. if self.is_stair:
  94. p = P.Floor()(p)
  95. return self.learning_rate * self.pow(self.decay_rate, p)
  96. class NaturalExpDecayLR(LearningRateSchedule):
  97. r"""
  98. Calculates learning rate base on natural exponential decay function.
  99. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  100. .. math::
  101. decayed\_learning\_rate[i] = learning\_rate * e^{-decay\_rate * p}
  102. Where :
  103. .. math::
  104. p = \frac{current\_step}{decay\_steps}
  105. If `is_stair` is True, the formula is :
  106. .. math::
  107. p = floor(\frac{current\_step}{decay\_steps})
  108. Args:
  109. learning_rate (float): The initial value of learning rate.
  110. decay_rate (float): The decay rate.
  111. decay_steps (int): A value used to calculate decayed learning rate.
  112. is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
  113. Inputs:
  114. - **global_step** (Tensor) - The current step number.
  115. Outputs:
  116. Tensor. The learning rate value for the current step with shape :math:`()`.
  117. Raises:
  118. TypeError: If `learning_rate` or `decay_rate` is not a float.
  119. TypeError: If `decay_steps` is not an int or `is_stair` is not a bool.
  120. ValueError: If `decay_steps` is less than 1.
  121. ValueError: If `learning_rate` or `decay_rate` is less than or equal to 0.
  122. Supported Platforms:
  123. ``Ascend`` ``GPU`` ``CPU``
  124. Examples:
  125. >>> learning_rate = 0.1
  126. >>> decay_rate = 0.9
  127. >>> decay_steps = 4
  128. >>> global_step = Tensor(2, mstype.int32)
  129. >>> natural_exp_decay_lr = nn.NaturalExpDecayLR(learning_rate, decay_rate, decay_steps, True)
  130. >>> result = natural_exp_decay_lr(global_step)
  131. >>> print(result)
  132. 0.1
  133. """
  134. def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False):
  135. super(NaturalExpDecayLR, self).__init__()
  136. _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name)
  137. self.learning_rate = learning_rate
  138. self.decay_rate = decay_rate
  139. self.decay_steps = decay_steps
  140. self.is_stair = is_stair
  141. self.math_e = math.e
  142. self.pow = P.Pow()
  143. self.cast = P.Cast()
  144. def construct(self, global_step):
  145. p = self.cast(global_step, mstype.float32)
  146. if self.is_stair:
  147. p = P.FloorDiv()(p, self.decay_steps) * self.decay_steps
  148. return self.learning_rate * self.pow(self.math_e, -self.decay_rate * p)
  149. class InverseDecayLR(LearningRateSchedule):
  150. r"""
  151. Calculates learning rate base on inverse-time decay function.
  152. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  153. .. math::
  154. decayed\_learning\_rate[i] = learning\_rate / (1 + decay\_rate * p)
  155. Where :
  156. .. math::
  157. p = \frac{current\_step}{decay\_steps}
  158. If `is_stair` is True, The formula is :
  159. .. math::
  160. p = floor(\frac{current\_step}{decay\_steps})
  161. Args:
  162. learning_rate (float): The initial value of learning rate.
  163. decay_rate (float): The decay rate.
  164. decay_steps (int): A value used to calculate decayed learning rate.
  165. is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: False.
  166. Inputs:
  167. - **global_step** (Tensor) - The current step number.
  168. Outputs:
  169. Tensor. The learning rate value for the current step with shape :math:`()`.
  170. Raises:
  171. TypeError: If `learning_rate` or `decay_rate` is not a float.
  172. TypeError: If `decay_steps` is not an int or `is_stair` is not a bool.
  173. ValueError: If `decay_steps` is less than 1.
  174. ValueError: If `learning_rate` or `decay_rate` is less than or equal to 0.
  175. Supported Platforms:
  176. ``Ascend`` ``GPU`` ``CPU``
  177. Examples:
  178. >>> learning_rate = 0.1
  179. >>> decay_rate = 0.9
  180. >>> decay_steps = 4
  181. >>> global_step = Tensor(2, mstype.int32)
  182. >>> inverse_decay_lr = nn.InverseDecayLR(learning_rate, decay_rate, decay_steps, True)
  183. >>> result = inverse_decay_lr(global_step)
  184. >>> print(result)
  185. 0.1
  186. """
  187. def __init__(self, learning_rate, decay_rate, decay_steps, is_stair=False):
  188. super(InverseDecayLR, self).__init__()
  189. _check_inputs(learning_rate, decay_rate, decay_steps, is_stair, self.cls_name)
  190. self.learning_rate = learning_rate
  191. self.decay_rate = decay_rate
  192. self.decay_steps = decay_steps
  193. self.is_stair = is_stair
  194. self.cast = P.Cast()
  195. def construct(self, global_step):
  196. p = self.cast(global_step, mstype.float32) / self.decay_steps
  197. if self.is_stair:
  198. p = P.Floor()(p)
  199. return self.learning_rate / (1 + self.decay_rate * p)
  200. class CosineDecayLR(LearningRateSchedule):
  201. r"""
  202. Calculates learning rate based on cosine decay function.
  203. For current step, the formula of computing decayed_learning_rate[current_step] is:
  204. .. math::
  205. decayed\_learning\_rate[current\_step] = min\_lr + 0.5 * (max\_lr - min\_lr) *
  206. (1 + cos(\frac{current\_step}{decay\_steps}\pi))
  207. Args:
  208. min_lr (float): The minimum value of learning rate.
  209. max_lr (float): The maximum value of learning rate.
  210. decay_steps (int): A value used to calculate decayed learning rate.
  211. Inputs:
  212. - **global_step** (Tensor) - The current step number.
  213. Outputs:
  214. Tensor. The learning rate value for the current step with shape :math:`()`.
  215. Raises:
  216. TypeError: If `min_lr` or `max_lr` is not a float.
  217. TypeError: If `decay_steps` is not an int.
  218. ValueError: If `min_lr` is less than 0 or `decay_steps` is less than 1.
  219. ValueError: If `max_lr` is less than or equal to 0.
  220. Supported Platforms:
  221. ``Ascend`` ``GPU``
  222. Examples:
  223. >>> min_lr = 0.01
  224. >>> max_lr = 0.1
  225. >>> decay_steps = 4
  226. >>> global_steps = Tensor(2, mstype.int32)
  227. >>> cosine_decay_lr = nn.CosineDecayLR(min_lr, max_lr, decay_steps)
  228. >>> result = cosine_decay_lr(global_steps)
  229. >>> print(result)
  230. 0.055
  231. """
  232. def __init__(self, min_lr, max_lr, decay_steps):
  233. super(CosineDecayLR, self).__init__()
  234. if not isinstance(min_lr, float):
  235. raise TypeError("For 'CosineDecayLR', the argument 'min_lr' must be type of float, "
  236. "but got 'min_lr' type: {}.".format(type(min_lr)))
  237. validator.check_non_negative_float(min_lr, "min_lr", self.cls_name)
  238. validator.check_positive_float(max_lr, 'max_lr', self.cls_name)
  239. validator.check_is_float(max_lr, 'max_lr', self.cls_name)
  240. validator.check_positive_int(decay_steps, "decay_steps", self.cls_name)
  241. if min_lr >= max_lr:
  242. raise ValueError("For 'CosineDecayLR', the 'max_lr' should be greater than the 'min_lr', "
  243. "but got 'max_lr' value: {}, 'min_lr' value: {}.".format(max_lr, min_lr))
  244. self.min_lr = min_lr
  245. self.max_lr = max_lr
  246. self.decay_steps = decay_steps
  247. self.math_pi = math.pi
  248. self.delta = 0.5 * (max_lr - min_lr)
  249. self.cos = P.Cos()
  250. self.min = P.Minimum()
  251. self.cast = P.Cast()
  252. def construct(self, global_step):
  253. p = self.cast(self.min(global_step, self.decay_steps), mstype.float32)
  254. return self.min_lr + self.delta * (1.0 + self.cos(self.math_pi * p / self.decay_steps))
  255. class PolynomialDecayLR(LearningRateSchedule):
  256. r"""
  257. Calculates learning rate base on polynomial decay function.
  258. For the i-th step, the formula of computing decayed_learning_rate[i] is:
  259. .. math::
  260. decayed\_learning\_rate[i] = (learning\_rate - end\_learning\_rate) *
  261. (1 - tmp\_step / tmp\_decay\_steps)^{power} + end\_learning\_rate
  262. Where :
  263. .. math::
  264. tmp\_step=min(current\_step, decay\_steps)
  265. If `update_decay_steps` is true, update the value of `tmp_decay_step` every `decay_steps`. The formula is :
  266. .. math::
  267. tmp\_decay\_steps = decay\_steps * ceil(current\_step / decay\_steps)
  268. Args:
  269. learning_rate (float): The initial value of learning rate.
  270. end_learning_rate (float): The end value of learning rate.
  271. decay_steps (int): A value used to calculate decayed learning rate.
  272. power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0.
  273. update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
  274. Inputs:
  275. - **global_step** (Tensor) - The current step number.
  276. Outputs:
  277. Tensor. The learning rate value for the current step with shape :math:`()`.
  278. Raises:
  279. TypeError: If `learning_rate`, `end_learning_rate` or `power` is not a float.
  280. TypeError: If `decay_steps` is not an int or `update_decay_steps` is not a bool.
  281. ValueError: If `end_learning_rate` is less than 0 or `decay_steps` is less than 1.
  282. ValueError: If `learning_rate` or `power` is less than or equal to 0.
  283. Supported Platforms:
  284. ``Ascend`` ``GPU``
  285. Examples:
  286. >>> learning_rate = 0.1
  287. >>> end_learning_rate = 0.01
  288. >>> decay_steps = 4
  289. >>> power = 0.5
  290. >>> global_step = Tensor(2, mstype.int32)
  291. >>> polynomial_decay_lr = nn.PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps, power)
  292. >>> result = polynomial_decay_lr(global_step)
  293. >>> print(result)
  294. 0.07363961
  295. """
  296. def __init__(self, learning_rate, end_learning_rate, decay_steps, power, update_decay_steps=False):
  297. super(PolynomialDecayLR, self).__init__()
  298. validator.check_positive_float(learning_rate, 'learning_rate')
  299. validator.check_is_float(learning_rate, 'learning_rate')
  300. if not isinstance(end_learning_rate, float):
  301. raise TypeError("For 'PolynomialDecayLR', the argument 'end_learning_rate' "
  302. "must be type of float, but got 'end_learning_rate' type: {}."
  303. .format(type(end_learning_rate)))
  304. validator.check_non_negative_float(end_learning_rate, "end_learning_rate", self.cls_name)
  305. validator.check_positive_int(decay_steps, 'decay_steps', self.cls_name)
  306. validator.check_value_type('update_decay_steps', update_decay_steps, [bool], self.cls_name)
  307. validator.check_positive_float(power, 'power', self.cls_name)
  308. validator.check_is_float(power, 'power', self.cls_name)
  309. self.decay_steps = decay_steps
  310. self.start_learning_rate = learning_rate
  311. self.end_learning_rate = end_learning_rate
  312. self.diff_learning_rate = learning_rate - end_learning_rate
  313. self.power = power
  314. self.update_decay_steps = update_decay_steps
  315. self.pow = P.Pow()
  316. self.ceil = P.Ceil()
  317. self.min = P.Minimum()
  318. self.max = P.Maximum()
  319. def construct(self, global_step):
  320. tmp_global_step = P.Cast()(global_step, mstype.float32)
  321. tmp_decay_step = self.decay_steps
  322. if self.update_decay_steps:
  323. tmp_decay_step = tmp_decay_step * self.max(self.ceil(tmp_global_step / tmp_decay_step), 1)
  324. else:
  325. tmp_global_step = self.min(tmp_global_step, tmp_decay_step)
  326. p = tmp_global_step / tmp_decay_step
  327. lr = self.diff_learning_rate * self.pow(1.0 - p, self.power) + self.end_learning_rate
  328. return lr
  329. class WarmUpLR(LearningRateSchedule):
  330. r"""
  331. Gets learning rate warming up.
  332. For current step, the formula of computing warmup_learning_rate[i] is:
  333. .. math::
  334. warmup\_learning\_rate[current\_step] = learning\_rate * tmp\_step / warmup\_steps
  335. Where :
  336. .. math:
  337. tmp\_step=min(current\_step, warmup\_steps)
  338. Args:
  339. learning_rate (float): The initial value of learning rate.
  340. warmup_steps (int): The warm up steps of learning rate.
  341. Inputs:
  342. - **global_step** (Tensor) - The current step number.
  343. Outputs:
  344. Tensor. The learning rate value for the current step with shape :math:`()`.
  345. Raises:
  346. TypeError: If `learning_rate` is not a float.
  347. TypeError: If `warmup_steps` is not an int.
  348. ValueError: If `warmup_steps` is less than 1.
  349. ValueError: If `learning_rate` is less than or equal to 0.
  350. Supported Platforms:
  351. ``Ascend`` ``GPU``
  352. Examples:
  353. >>> learning_rate = 0.1
  354. >>> warmup_steps = 2
  355. >>> global_step = Tensor(2, mstype.int32)
  356. >>> warmup_lr = nn.WarmUpLR(learning_rate, warmup_steps)
  357. >>> result = warmup_lr(global_step)
  358. >>> print(result)
  359. 0.1
  360. """
  361. def __init__(self, learning_rate, warmup_steps):
  362. super(WarmUpLR, self).__init__()
  363. if not isinstance(learning_rate, float):
  364. raise TypeError("For 'WarmUpLR', the argument 'learning_rate' must be type of float, "
  365. "but got 'learning_rate' type: {}.".format(type(learning_rate)))
  366. validator.check_non_negative_float(learning_rate, "learning_rate", self.cls_name)
  367. validator.check_positive_int(warmup_steps, 'warmup_steps', self.cls_name)
  368. self.warmup_steps = warmup_steps
  369. self.learning_rate = learning_rate
  370. self.min = P.Minimum()
  371. self.cast = P.Cast()
  372. def construct(self, global_step):
  373. warmup_percent = self.cast(self.min(global_step, self.warmup_steps), mstype.float32) / self.warmup_steps
  374. return self.learning_rate * warmup_percent
  375. __all__ = [
  376. 'ExponentialDecayLR',
  377. 'NaturalExpDecayLR',
  378. 'InverseDecayLR',
  379. 'CosineDecayLR',
  380. 'PolynomialDecayLR',
  381. 'WarmUpLR'
  382. ]