optimizer.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. import sys
  2. import math
  3. from paddle.optimizer.lr import LinearWarmup
  4. from paddle.optimizer.lr import PiecewiseDecay
  5. from paddle.optimizer.lr import CosineAnnealingDecay
  6. from paddle.optimizer.lr import ExponentialDecay
  7. import paddle
  8. import paddle.regularizer as regularizer
  9. from copy import deepcopy
  10. class Cosine(CosineAnnealingDecay):
  11. """
  12. Cosine learning rate decay
  13. lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
  14. Args:
  15. lr(float): initial learning rate
  16. step_each_epoch(int): steps each epoch
  17. epochs(int): total training epochs
  18. """
  19. def __init__(self, lr, step_each_epoch, epochs, **kwargs):
  20. super(Cosine, self).__init__(
  21. learning_rate=lr,
  22. T_max=step_each_epoch * epochs, )
  23. self.update_specified = False
  24. class Piecewise(PiecewiseDecay):
  25. """
  26. Piecewise learning rate decay
  27. Args:
  28. lr(float): initial learning rate
  29. step_each_epoch(int): steps each epoch
  30. decay_epochs(list): piecewise decay epochs
  31. gamma(float): decay factor
  32. """
  33. def __init__(self, lr, step_each_epoch, decay_epochs, gamma=0.1, **kwargs):
  34. boundaries = [step_each_epoch * e for e in decay_epochs]
  35. lr_values = [lr * (gamma**i) for i in range(len(boundaries) + 1)]
  36. super(Piecewise, self).__init__(boundaries=boundaries, values=lr_values)
  37. self.update_specified = False
  38. class CosineWarmup(LinearWarmup):
  39. """
  40. Cosine learning rate decay with warmup
  41. [0, warmup_epoch): linear warmup
  42. [warmup_epoch, epochs): cosine decay
  43. Args:
  44. lr(float): initial learning rate
  45. step_each_epoch(int): steps each epoch
  46. epochs(int): total training epochs
  47. warmup_epoch(int): epoch num of warmup
  48. """
  49. def __init__(self, lr, step_each_epoch, epochs, warmup_epoch=5, **kwargs):
  50. assert epochs > warmup_epoch, "total epoch({}) should be larger than warmup_epoch({}) in CosineWarmup.".format(
  51. epochs, warmup_epoch)
  52. warmup_step = warmup_epoch * step_each_epoch
  53. start_lr = 0.0
  54. end_lr = lr
  55. lr_sch = Cosine(lr, step_each_epoch, epochs - warmup_epoch)
  56. super(CosineWarmup, self).__init__(
  57. learning_rate=lr_sch,
  58. warmup_steps=warmup_step,
  59. start_lr=start_lr,
  60. end_lr=end_lr)
  61. self.update_specified = False
  62. class ExponentialWarmup(LinearWarmup):
  63. """
  64. Exponential learning rate decay with warmup
  65. [0, warmup_epoch): linear warmup
  66. [warmup_epoch, epochs): Exponential decay
  67. Args:
  68. lr(float): initial learning rate
  69. step_each_epoch(int): steps each epoch
  70. decay_epochs(float): decay epochs
  71. decay_rate(float): decay rate
  72. warmup_epoch(int): epoch num of warmup
  73. """
  74. def __init__(self,
  75. lr,
  76. step_each_epoch,
  77. decay_epochs=2.4,
  78. decay_rate=0.97,
  79. warmup_epoch=5,
  80. **kwargs):
  81. warmup_step = warmup_epoch * step_each_epoch
  82. start_lr = 0.0
  83. end_lr = lr
  84. lr_sch = ExponentialDecay(lr, decay_rate)
  85. super(ExponentialWarmup, self).__init__(
  86. learning_rate=lr_sch,
  87. warmup_steps=warmup_step,
  88. start_lr=start_lr,
  89. end_lr=end_lr)
  90. # NOTE: hac method to update exponential lr scheduler
  91. self.update_specified = True
  92. self.update_start_step = warmup_step
  93. self.update_step_interval = int(decay_epochs * step_each_epoch)
  94. self.step_each_epoch = step_each_epoch
  95. class LearningRateBuilder():
  96. """
  97. Build learning rate variable
  98. https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/layers_cn.html
  99. Args:
  100. function(str): class name of learning rate
  101. params(dict): parameters used for init the class
  102. """
  103. def __init__(self,
  104. function='Linear',
  105. params={'lr': 0.1,
  106. 'steps': 100,
  107. 'end_lr': 0.0}):
  108. self.function = function
  109. self.params = params
  110. def __call__(self):
  111. mod = sys.modules[__name__]
  112. lr = getattr(mod, self.function)(**self.params)
  113. return lr
  114. class L1Decay(object):
  115. """
  116. L1 Weight Decay Regularization, which encourages the weights to be sparse.
  117. Args:
  118. factor(float): regularization coeff. Default:0.0.
  119. """
  120. def __init__(self, factor=0.0):
  121. super(L1Decay, self).__init__()
  122. self.factor = factor
  123. def __call__(self):
  124. reg = regularizer.L1Decay(self.factor)
  125. return reg
  126. class L2Decay(object):
  127. """
  128. L2 Weight Decay Regularization, which encourages the weights to be sparse.
  129. Args:
  130. factor(float): regularization coeff. Default:0.0.
  131. """
  132. def __init__(self, factor=0.0):
  133. super(L2Decay, self).__init__()
  134. self.factor = factor
  135. def __call__(self):
  136. reg = regularizer.L2Decay(self.factor)
  137. return reg
  138. class Momentum(object):
  139. """
  140. Simple Momentum optimizer with velocity state.
  141. Args:
  142. learning_rate (float|Variable) - The learning rate used to update parameters.
  143. Can be a float value or a Variable with one float value as data element.
  144. momentum (float) - Momentum factor.
  145. regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
  146. """
  147. def __init__(self,
  148. learning_rate,
  149. momentum,
  150. parameter_list=None,
  151. regularization=None,
  152. **args):
  153. super(Momentum, self).__init__()
  154. self.learning_rate = learning_rate
  155. self.momentum = momentum
  156. self.parameter_list = parameter_list
  157. self.regularization = regularization
  158. def __call__(self):
  159. opt = paddle.optimizer.Momentum(
  160. learning_rate=self.learning_rate,
  161. momentum=self.momentum,
  162. parameters=self.parameter_list,
  163. weight_decay=self.regularization)
  164. return opt
  165. class RMSProp(object):
  166. """
  167. Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
  168. Args:
  169. learning_rate (float|Variable) - The learning rate used to update parameters.
  170. Can be a float value or a Variable with one float value as data element.
  171. momentum (float) - Momentum factor.
  172. rho (float) - rho value in equation.
  173. epsilon (float) - avoid division by zero, default is 1e-6.
  174. regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
  175. """
  176. def __init__(self,
  177. learning_rate,
  178. momentum,
  179. rho=0.95,
  180. epsilon=1e-6,
  181. parameter_list=None,
  182. regularization=None,
  183. **args):
  184. super(RMSProp, self).__init__()
  185. self.learning_rate = learning_rate
  186. self.momentum = momentum
  187. self.rho = rho
  188. self.epsilon = epsilon
  189. self.parameter_list = parameter_list
  190. self.regularization = regularization
  191. def __call__(self):
  192. opt = paddle.optimizer.RMSProp(
  193. learning_rate=self.learning_rate,
  194. momentum=self.momentum,
  195. rho=self.rho,
  196. epsilon=self.epsilon,
  197. parameters=self.parameter_list,
  198. weight_decay=self.regularization)
  199. return opt
  200. class OptimizerBuilder(object):
  201. """
  202. Build optimizer
  203. Args:
  204. function(str): optimizer name of learning rate
  205. params(dict): parameters used for init the class
  206. regularizer (dict): parameters used for create regularization
  207. """
  208. def __init__(self,
  209. function='Momentum',
  210. params={'momentum': 0.9},
  211. regularizer=None):
  212. self.function = function
  213. self.params = params
  214. # create regularizer
  215. if regularizer is not None:
  216. mod = sys.modules[__name__]
  217. reg_func = regularizer['function'] + 'Decay'
  218. del regularizer['function']
  219. reg = getattr(mod, reg_func)(**regularizer)()
  220. self.params['regularization'] = reg
  221. def __call__(self, learning_rate, parameter_list=None):
  222. mod = sys.modules[__name__]
  223. opt = getattr(mod, self.function)
  224. return opt(learning_rate=learning_rate,
  225. parameter_list=parameter_list,
  226. **self.params)()
  227. def create_optimizer(config, parameter_list=None):
  228. """
  229. Create an optimizer using config, usually including
  230. learning rate and regularization.
  231. Args:
  232. config(dict): such as
  233. {
  234. 'LEARNING_RATE':
  235. {'function': 'Cosine',
  236. 'params': {'lr': 0.1}
  237. },
  238. 'OPTIMIZER':
  239. {'function': 'Momentum',
  240. 'params':{'momentum': 0.9},
  241. 'regularizer':
  242. {'function': 'L2', 'factor': 0.0001}
  243. }
  244. }
  245. Returns:
  246. an optimizer instance
  247. """
  248. # create learning_rate instance
  249. lr_config = config['LEARNING_RATE']
  250. lr_config['params'].update({
  251. 'epochs': config['epoch'],
  252. 'step_each_epoch':
  253. config['total_images'] // config['TRAIN']['batch_size'],
  254. })
  255. lr = LearningRateBuilder(**lr_config)()
  256. # create optimizer instance
  257. opt_config = deepcopy(config['OPTIMIZER'])
  258. opt = OptimizerBuilder(**opt_config)
  259. return opt(lr, parameter_list), lr
  260. def create_multi_optimizer(config, parameter_list=None):
  261. """
  262. """
  263. # create learning_rate instance
  264. lr_config = config['LEARNING_RATE']
  265. lr_config['params'].update({
  266. 'epochs': config['epoch'],
  267. 'step_each_epoch':
  268. config['total_images'] // config['TRAIN']['batch_size'],
  269. })
  270. lr = LearningRateBuilder(**lr_config)()
  271. # create optimizer instance
  272. opt_config = deepcopy.copy(config['OPTIMIZER'])
  273. opt = OptimizerBuilder(**opt_config)
  274. return opt(lr, parameter_list), lr