learning_rate.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from __future__ import unicode_literals
  18. from paddle.optimizer import lr
  19. from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay, TwoStepCosineDecay
  20. class Linear(object):
  21. """
  22. Linear learning rate decay
  23. Args:
  24. lr (float): The initial learning rate. It is a python float number.
  25. epochs(int): The decay step size. It determines the decay cycle.
  26. end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
  27. power(float, optional): Power of polynomial. Default: 1.0.
  28. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  29. """
  30. def __init__(self,
  31. learning_rate,
  32. epochs,
  33. step_each_epoch,
  34. end_lr=0.0,
  35. power=1.0,
  36. warmup_epoch=0,
  37. last_epoch=-1,
  38. **kwargs):
  39. super(Linear, self).__init__()
  40. self.learning_rate = learning_rate
  41. self.epochs = epochs * step_each_epoch
  42. self.end_lr = end_lr
  43. self.power = power
  44. self.last_epoch = last_epoch
  45. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  46. def __call__(self):
  47. learning_rate = lr.PolynomialDecay(
  48. learning_rate=self.learning_rate,
  49. decay_steps=self.epochs,
  50. end_lr=self.end_lr,
  51. power=self.power,
  52. last_epoch=self.last_epoch)
  53. if self.warmup_epoch > 0:
  54. learning_rate = lr.LinearWarmup(
  55. learning_rate=learning_rate,
  56. warmup_steps=self.warmup_epoch,
  57. start_lr=0.0,
  58. end_lr=self.learning_rate,
  59. last_epoch=self.last_epoch)
  60. return learning_rate
  61. class Cosine(object):
  62. """
  63. Cosine learning rate decay
  64. lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
  65. Args:
  66. lr(float): initial learning rate
  67. step_each_epoch(int): steps each epoch
  68. epochs(int): total training epochs
  69. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  70. """
  71. def __init__(self,
  72. learning_rate,
  73. step_each_epoch,
  74. epochs,
  75. warmup_epoch=0,
  76. last_epoch=-1,
  77. **kwargs):
  78. super(Cosine, self).__init__()
  79. self.learning_rate = learning_rate
  80. self.T_max = step_each_epoch * epochs
  81. self.last_epoch = last_epoch
  82. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  83. def __call__(self):
  84. learning_rate = lr.CosineAnnealingDecay(
  85. learning_rate=self.learning_rate,
  86. T_max=self.T_max,
  87. last_epoch=self.last_epoch)
  88. if self.warmup_epoch > 0:
  89. learning_rate = lr.LinearWarmup(
  90. learning_rate=learning_rate,
  91. warmup_steps=self.warmup_epoch,
  92. start_lr=0.0,
  93. end_lr=self.learning_rate,
  94. last_epoch=self.last_epoch)
  95. return learning_rate
  96. class Step(object):
  97. """
  98. Piecewise learning rate decay
  99. Args:
  100. step_each_epoch(int): steps each epoch
  101. learning_rate (float): The initial learning rate. It is a python float number.
  102. step_size (int): the interval to update.
  103. gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
  104. It should be less than 1.0. Default: 0.1.
  105. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  106. """
  107. def __init__(self,
  108. learning_rate,
  109. step_size,
  110. step_each_epoch,
  111. gamma,
  112. warmup_epoch=0,
  113. last_epoch=-1,
  114. **kwargs):
  115. super(Step, self).__init__()
  116. self.step_size = step_each_epoch * step_size
  117. self.learning_rate = learning_rate
  118. self.gamma = gamma
  119. self.last_epoch = last_epoch
  120. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  121. def __call__(self):
  122. learning_rate = lr.StepDecay(
  123. learning_rate=self.learning_rate,
  124. step_size=self.step_size,
  125. gamma=self.gamma,
  126. last_epoch=self.last_epoch)
  127. if self.warmup_epoch > 0:
  128. learning_rate = lr.LinearWarmup(
  129. learning_rate=learning_rate,
  130. warmup_steps=self.warmup_epoch,
  131. start_lr=0.0,
  132. end_lr=self.learning_rate,
  133. last_epoch=self.last_epoch)
  134. return learning_rate
  135. class Piecewise(object):
  136. """
  137. Piecewise learning rate decay
  138. Args:
  139. boundaries(list): A list of steps numbers. The type of element in the list is python int.
  140. values(list): A list of learning rate values that will be picked during different epoch boundaries.
  141. The type of element in the list is python float.
  142. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  143. """
  144. def __init__(self,
  145. step_each_epoch,
  146. decay_epochs,
  147. values,
  148. warmup_epoch=0,
  149. last_epoch=-1,
  150. **kwargs):
  151. super(Piecewise, self).__init__()
  152. self.boundaries = [step_each_epoch * e for e in decay_epochs]
  153. self.values = values
  154. self.last_epoch = last_epoch
  155. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  156. def __call__(self):
  157. learning_rate = lr.PiecewiseDecay(
  158. boundaries=self.boundaries,
  159. values=self.values,
  160. last_epoch=self.last_epoch)
  161. if self.warmup_epoch > 0:
  162. learning_rate = lr.LinearWarmup(
  163. learning_rate=learning_rate,
  164. warmup_steps=self.warmup_epoch,
  165. start_lr=0.0,
  166. end_lr=self.values[0],
  167. last_epoch=self.last_epoch)
  168. return learning_rate
  169. class CyclicalCosine(object):
  170. """
  171. Cyclical cosine learning rate decay
  172. Args:
  173. learning_rate(float): initial learning rate
  174. step_each_epoch(int): steps each epoch
  175. epochs(int): total training epochs
  176. cycle(int): period of the cosine learning rate
  177. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  178. """
  179. def __init__(self,
  180. learning_rate,
  181. step_each_epoch,
  182. epochs,
  183. cycle,
  184. warmup_epoch=0,
  185. last_epoch=-1,
  186. **kwargs):
  187. super(CyclicalCosine, self).__init__()
  188. self.learning_rate = learning_rate
  189. self.T_max = step_each_epoch * epochs
  190. self.last_epoch = last_epoch
  191. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  192. self.cycle = round(cycle * step_each_epoch)
  193. def __call__(self):
  194. learning_rate = CyclicalCosineDecay(
  195. learning_rate=self.learning_rate,
  196. T_max=self.T_max,
  197. cycle=self.cycle,
  198. last_epoch=self.last_epoch)
  199. if self.warmup_epoch > 0:
  200. learning_rate = lr.LinearWarmup(
  201. learning_rate=learning_rate,
  202. warmup_steps=self.warmup_epoch,
  203. start_lr=0.0,
  204. end_lr=self.learning_rate,
  205. last_epoch=self.last_epoch)
  206. return learning_rate
  207. class OneCycle(object):
  208. """
  209. One Cycle learning rate decay
  210. Args:
  211. max_lr(float): Upper learning rate boundaries
  212. epochs(int): total training epochs
  213. step_each_epoch(int): steps each epoch
  214. anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing.
  215. Default: ‘cos’
  216. three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’
  217. instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’).
  218. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  219. """
  220. def __init__(self,
  221. max_lr,
  222. epochs,
  223. step_each_epoch,
  224. anneal_strategy='cos',
  225. three_phase=False,
  226. warmup_epoch=0,
  227. last_epoch=-1,
  228. **kwargs):
  229. super(OneCycle, self).__init__()
  230. self.max_lr = max_lr
  231. self.epochs = epochs
  232. self.steps_per_epoch = step_each_epoch
  233. self.anneal_strategy = anneal_strategy
  234. self.three_phase = three_phase
  235. self.last_epoch = last_epoch
  236. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  237. def __call__(self):
  238. learning_rate = OneCycleDecay(
  239. max_lr=self.max_lr,
  240. epochs=self.epochs,
  241. steps_per_epoch=self.steps_per_epoch,
  242. anneal_strategy=self.anneal_strategy,
  243. three_phase=self.three_phase,
  244. last_epoch=self.last_epoch)
  245. if self.warmup_epoch > 0:
  246. learning_rate = lr.LinearWarmup(
  247. learning_rate=learning_rate,
  248. warmup_steps=self.warmup_epoch,
  249. start_lr=0.0,
  250. end_lr=self.max_lr,
  251. last_epoch=self.last_epoch)
  252. return learning_rate
  253. class Const(object):
  254. """
  255. Const learning rate decay
  256. Args:
  257. learning_rate(float): initial learning rate
  258. step_each_epoch(int): steps each epoch
  259. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  260. """
  261. def __init__(self,
  262. learning_rate,
  263. step_each_epoch,
  264. warmup_epoch=0,
  265. last_epoch=-1,
  266. **kwargs):
  267. super(Const, self).__init__()
  268. self.learning_rate = learning_rate
  269. self.last_epoch = last_epoch
  270. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  271. def __call__(self):
  272. learning_rate = self.learning_rate
  273. if self.warmup_epoch > 0:
  274. learning_rate = lr.LinearWarmup(
  275. learning_rate=learning_rate,
  276. warmup_steps=self.warmup_epoch,
  277. start_lr=0.0,
  278. end_lr=self.learning_rate,
  279. last_epoch=self.last_epoch)
  280. return learning_rate
  281. class DecayLearningRate(object):
  282. """
  283. DecayLearningRate learning rate decay
  284. new_lr = (lr - end_lr) * (1 - epoch/decay_steps)**power + end_lr
  285. Args:
  286. learning_rate(float): initial learning rate
  287. step_each_epoch(int): steps each epoch
  288. epochs(int): total training epochs
  289. factor(float): Power of polynomial, should greater than 0.0 to get learning rate decay. Default: 0.9
  290. end_lr(float): The minimum final learning rate. Default: 0.0.
  291. """
  292. def __init__(self,
  293. learning_rate,
  294. step_each_epoch,
  295. epochs,
  296. factor=0.9,
  297. end_lr=0,
  298. **kwargs):
  299. super(DecayLearningRate, self).__init__()
  300. self.learning_rate = learning_rate
  301. self.epochs = epochs + 1
  302. self.factor = factor
  303. self.end_lr = 0
  304. self.decay_steps = step_each_epoch * epochs
  305. def __call__(self):
  306. learning_rate = lr.PolynomialDecay(
  307. learning_rate=self.learning_rate,
  308. decay_steps=self.decay_steps,
  309. power=self.factor,
  310. end_lr=self.end_lr)
  311. return learning_rate
  312. class MultiStepDecay(object):
  313. """
  314. Piecewise learning rate decay
  315. Args:
  316. step_each_epoch(int): steps each epoch
  317. learning_rate (float): The initial learning rate. It is a python float number.
  318. step_size (int): the interval to update.
  319. gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
  320. It should be less than 1.0. Default: 0.1.
  321. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  322. """
  323. def __init__(self,
  324. learning_rate,
  325. milestones,
  326. step_each_epoch,
  327. gamma,
  328. warmup_epoch=0,
  329. last_epoch=-1,
  330. **kwargs):
  331. super(MultiStepDecay, self).__init__()
  332. self.milestones = [step_each_epoch * e for e in milestones]
  333. self.learning_rate = learning_rate
  334. self.gamma = gamma
  335. self.last_epoch = last_epoch
  336. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  337. def __call__(self):
  338. learning_rate = lr.MultiStepDecay(
  339. learning_rate=self.learning_rate,
  340. milestones=self.milestones,
  341. gamma=self.gamma,
  342. last_epoch=self.last_epoch)
  343. if self.warmup_epoch > 0:
  344. learning_rate = lr.LinearWarmup(
  345. learning_rate=learning_rate,
  346. warmup_steps=self.warmup_epoch,
  347. start_lr=0.0,
  348. end_lr=self.learning_rate,
  349. last_epoch=self.last_epoch)
  350. return learning_rate
  351. class TwoStepCosine(object):
  352. """
  353. Cosine learning rate decay
  354. lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
  355. Args:
  356. lr(float): initial learning rate
  357. step_each_epoch(int): steps each epoch
  358. epochs(int): total training epochs
  359. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
  360. """
  361. def __init__(self,
  362. learning_rate,
  363. step_each_epoch,
  364. epochs,
  365. warmup_epoch=0,
  366. last_epoch=-1,
  367. **kwargs):
  368. super(TwoStepCosine, self).__init__()
  369. self.learning_rate = learning_rate
  370. self.T_max1 = step_each_epoch * 200
  371. self.T_max2 = step_each_epoch * epochs
  372. self.last_epoch = last_epoch
  373. self.warmup_epoch = round(warmup_epoch * step_each_epoch)
  374. def __call__(self):
  375. learning_rate = TwoStepCosineDecay(
  376. learning_rate=self.learning_rate,
  377. T_max1=self.T_max1,
  378. T_max2=self.T_max2,
  379. last_epoch=self.last_epoch)
  380. if self.warmup_epoch > 0:
  381. learning_rate = lr.LinearWarmup(
  382. learning_rate=learning_rate,
  383. warmup_steps=self.warmup_epoch,
  384. start_lr=0.0,
  385. end_lr=self.learning_rate,
  386. last_epoch=self.last_epoch)
  387. return learning_rate