rec_micronet.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py
  17. https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py
  18. """
  19. from __future__ import absolute_import
  20. from __future__ import division
  21. from __future__ import print_function
  22. import paddle
  23. import paddle.nn as nn
  24. from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible
  25. M0_cfgs = [
  26. # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
  27. [2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1],
  28. [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1],
  29. [2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
  30. [1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1],
  31. [2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1],
  32. [1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2],
  33. [1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2],
  34. ]
  35. M1_cfgs = [
  36. # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
  37. [2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1],
  38. [2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1],
  39. [2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1],
  40. [1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1],
  41. [2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1],
  42. [1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
  43. [1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2],
  44. ]
  45. M2_cfgs = [
  46. # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
  47. [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1],
  48. [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
  49. [1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1],
  50. [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1],
  51. [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2],
  52. [1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2],
  53. [2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
  54. [1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2],
  55. [1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2],
  56. ]
  57. M3_cfgs = [
  58. # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
  59. [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1],
  60. [2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1],
  61. [1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1],
  62. [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1],
  63. [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2],
  64. [1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2],
  65. [1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2],
  66. [1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2],
  67. [1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2],
  68. [1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2],
  69. [1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2],
  70. [1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2],
  71. ]
  72. def get_micronet_config(mode):
  73. return eval(mode + '_cfgs')
  74. class MaxGroupPooling(nn.Layer):
  75. def __init__(self, channel_per_group=2):
  76. super(MaxGroupPooling, self).__init__()
  77. self.channel_per_group = channel_per_group
  78. def forward(self, x):
  79. if self.channel_per_group == 1:
  80. return x
  81. # max op
  82. b, c, h, w = x.shape
  83. # reshape
  84. y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w])
  85. out = paddle.max(y, axis=2)
  86. return out
  87. class SpatialSepConvSF(nn.Layer):
  88. def __init__(self, inp, oups, kernel_size, stride):
  89. super(SpatialSepConvSF, self).__init__()
  90. oup1, oup2 = oups
  91. self.conv = nn.Sequential(
  92. nn.Conv2D(
  93. inp,
  94. oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0),
  95. bias_attr=False,
  96. groups=1),
  97. nn.BatchNorm2D(oup1),
  98. nn.Conv2D(
  99. oup1,
  100. oup1 * oup2, (1, kernel_size), (1, stride),
  101. (0, kernel_size // 2),
  102. bias_attr=False,
  103. groups=oup1),
  104. nn.BatchNorm2D(oup1 * oup2),
  105. ChannelShuffle(oup1), )
  106. def forward(self, x):
  107. out = self.conv(x)
  108. return out
  109. class ChannelShuffle(nn.Layer):
  110. def __init__(self, groups):
  111. super(ChannelShuffle, self).__init__()
  112. self.groups = groups
  113. def forward(self, x):
  114. b, c, h, w = x.shape
  115. channels_per_group = c // self.groups
  116. # reshape
  117. x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w])
  118. x = paddle.transpose(x, (0, 2, 1, 3, 4))
  119. out = paddle.reshape(x, [b, -1, h, w])
  120. return out
  121. class StemLayer(nn.Layer):
  122. def __init__(self, inp, oup, stride, groups=(4, 4)):
  123. super(StemLayer, self).__init__()
  124. g1, g2 = groups
  125. self.stem = nn.Sequential(
  126. SpatialSepConvSF(inp, groups, 3, stride),
  127. MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6())
  128. def forward(self, x):
  129. out = self.stem(x)
  130. return out
  131. class DepthSpatialSepConv(nn.Layer):
  132. def __init__(self, inp, expand, kernel_size, stride):
  133. super(DepthSpatialSepConv, self).__init__()
  134. exp1, exp2 = expand
  135. hidden_dim = inp * exp1
  136. oup = inp * exp1 * exp2
  137. self.conv = nn.Sequential(
  138. nn.Conv2D(
  139. inp,
  140. inp * exp1, (kernel_size, 1), (stride, 1),
  141. (kernel_size // 2, 0),
  142. bias_attr=False,
  143. groups=inp),
  144. nn.BatchNorm2D(inp * exp1),
  145. nn.Conv2D(
  146. hidden_dim,
  147. oup, (1, kernel_size),
  148. 1, (0, kernel_size // 2),
  149. bias_attr=False,
  150. groups=hidden_dim),
  151. nn.BatchNorm2D(oup))
  152. def forward(self, x):
  153. x = self.conv(x)
  154. return x
  155. class GroupConv(nn.Layer):
  156. def __init__(self, inp, oup, groups=2):
  157. super(GroupConv, self).__init__()
  158. self.inp = inp
  159. self.oup = oup
  160. self.groups = groups
  161. self.conv = nn.Sequential(
  162. nn.Conv2D(
  163. inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]),
  164. nn.BatchNorm2D(oup))
  165. def forward(self, x):
  166. x = self.conv(x)
  167. return x
  168. class DepthConv(nn.Layer):
  169. def __init__(self, inp, oup, kernel_size, stride):
  170. super(DepthConv, self).__init__()
  171. self.conv = nn.Sequential(
  172. nn.Conv2D(
  173. inp,
  174. oup,
  175. kernel_size,
  176. stride,
  177. kernel_size // 2,
  178. bias_attr=False,
  179. groups=inp),
  180. nn.BatchNorm2D(oup))
  181. def forward(self, x):
  182. out = self.conv(x)
  183. return out
  184. class DYShiftMax(nn.Layer):
  185. def __init__(self,
  186. inp,
  187. oup,
  188. reduction=4,
  189. act_max=1.0,
  190. act_relu=True,
  191. init_a=[0.0, 0.0],
  192. init_b=[0.0, 0.0],
  193. relu_before_pool=False,
  194. g=None,
  195. expansion=False):
  196. super(DYShiftMax, self).__init__()
  197. self.oup = oup
  198. self.act_max = act_max * 2
  199. self.act_relu = act_relu
  200. self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else
  201. nn.Sequential(), nn.AdaptiveAvgPool2D(1))
  202. self.exp = 4 if act_relu else 2
  203. self.init_a = init_a
  204. self.init_b = init_b
  205. # determine squeeze
  206. squeeze = make_divisible(inp // reduction, 4)
  207. if squeeze < 4:
  208. squeeze = 4
  209. self.fc = nn.Sequential(
  210. nn.Linear(inp, squeeze),
  211. nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid())
  212. if g is None:
  213. g = 1
  214. self.g = g[1]
  215. if self.g != 1 and expansion:
  216. self.g = inp // self.g
  217. self.gc = inp // self.g
  218. index = paddle.to_tensor([range(inp)])
  219. index = paddle.reshape(index, [1, inp, 1, 1])
  220. index = paddle.reshape(index, [1, self.g, self.gc, 1, 1])
  221. indexgs = paddle.split(index, [1, self.g - 1], axis=1)
  222. indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1)
  223. indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2)
  224. indexs = paddle.concat((indexs[1], indexs[0]), axis=2)
  225. self.index = paddle.reshape(indexs, [inp])
  226. self.expansion = expansion
  227. def forward(self, x):
  228. x_in = x
  229. x_out = x
  230. b, c, _, _ = x_in.shape
  231. y = self.avg_pool(x_in)
  232. y = paddle.reshape(y, [b, c])
  233. y = self.fc(y)
  234. y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1])
  235. y = (y - 0.5) * self.act_max
  236. n2, c2, h2, w2 = x_out.shape
  237. x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :])
  238. if self.exp == 4:
  239. temp = y.shape
  240. a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1)
  241. a1 = a1 + self.init_a[0]
  242. a2 = a2 + self.init_a[1]
  243. b1 = b1 + self.init_b[0]
  244. b2 = b2 + self.init_b[1]
  245. z1 = x_out * a1 + x2 * b1
  246. z2 = x_out * a2 + x2 * b2
  247. out = paddle.maximum(z1, z2)
  248. elif self.exp == 2:
  249. temp = y.shape
  250. a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1)
  251. a1 = a1 + self.init_a[0]
  252. b1 = b1 + self.init_b[0]
  253. out = x_out * a1 + x2 * b1
  254. return out
  255. class DYMicroBlock(nn.Layer):
  256. def __init__(self,
  257. inp,
  258. oup,
  259. kernel_size=3,
  260. stride=1,
  261. ch_exp=(2, 2),
  262. ch_per_group=4,
  263. groups_1x1=(1, 1),
  264. depthsep=True,
  265. shuffle=False,
  266. activation_cfg=None):
  267. super(DYMicroBlock, self).__init__()
  268. self.identity = stride == 1 and inp == oup
  269. y1, y2, y3 = activation_cfg['dy']
  270. act_reduction = 8 * activation_cfg['ratio']
  271. init_a = activation_cfg['init_a']
  272. init_b = activation_cfg['init_b']
  273. t1 = ch_exp
  274. gs1 = ch_per_group
  275. hidden_fft, g1, g2 = groups_1x1
  276. hidden_dim2 = inp * t1[0] * t1[1]
  277. if gs1[0] == 0:
  278. self.layers = nn.Sequential(
  279. DepthSpatialSepConv(inp, t1, kernel_size, stride),
  280. DYShiftMax(
  281. hidden_dim2,
  282. hidden_dim2,
  283. act_max=2.0,
  284. act_relu=True if y2 == 2 else False,
  285. init_a=init_a,
  286. reduction=act_reduction,
  287. init_b=init_b,
  288. g=gs1,
  289. expansion=False) if y2 > 0 else nn.ReLU6(),
  290. ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
  291. ChannelShuffle(hidden_dim2 // 2)
  292. if shuffle and y2 != 0 else nn.Sequential(),
  293. GroupConv(hidden_dim2, oup, (g1, g2)),
  294. DYShiftMax(
  295. oup,
  296. oup,
  297. act_max=2.0,
  298. act_relu=False,
  299. init_a=[1.0, 0.0],
  300. reduction=act_reduction // 2,
  301. init_b=[0.0, 0.0],
  302. g=(g1, g2),
  303. expansion=False) if y3 > 0 else nn.Sequential(),
  304. ChannelShuffle(g2) if shuffle else nn.Sequential(),
  305. ChannelShuffle(oup // 2)
  306. if shuffle and oup % 2 == 0 and y3 != 0 else nn.Sequential(), )
  307. elif g2 == 0:
  308. self.layers = nn.Sequential(
  309. GroupConv(inp, hidden_dim2, gs1),
  310. DYShiftMax(
  311. hidden_dim2,
  312. hidden_dim2,
  313. act_max=2.0,
  314. act_relu=False,
  315. init_a=[1.0, 0.0],
  316. reduction=act_reduction,
  317. init_b=[0.0, 0.0],
  318. g=gs1,
  319. expansion=False) if y3 > 0 else nn.Sequential(), )
  320. else:
  321. self.layers = nn.Sequential(
  322. GroupConv(inp, hidden_dim2, gs1),
  323. DYShiftMax(
  324. hidden_dim2,
  325. hidden_dim2,
  326. act_max=2.0,
  327. act_relu=True if y1 == 2 else False,
  328. init_a=init_a,
  329. reduction=act_reduction,
  330. init_b=init_b,
  331. g=gs1,
  332. expansion=False) if y1 > 0 else nn.ReLU6(),
  333. ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
  334. DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride)
  335. if depthsep else
  336. DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride),
  337. nn.Sequential(),
  338. DYShiftMax(
  339. hidden_dim2,
  340. hidden_dim2,
  341. act_max=2.0,
  342. act_relu=True if y2 == 2 else False,
  343. init_a=init_a,
  344. reduction=act_reduction,
  345. init_b=init_b,
  346. g=gs1,
  347. expansion=True) if y2 > 0 else nn.ReLU6(),
  348. ChannelShuffle(hidden_dim2 // 4)
  349. if shuffle and y1 != 0 and y2 != 0 else nn.Sequential()
  350. if y1 == 0 and y2 == 0 else ChannelShuffle(hidden_dim2 // 2),
  351. GroupConv(hidden_dim2, oup, (g1, g2)),
  352. DYShiftMax(
  353. oup,
  354. oup,
  355. act_max=2.0,
  356. act_relu=False,
  357. init_a=[1.0, 0.0],
  358. reduction=act_reduction // 2
  359. if oup < hidden_dim2 else act_reduction,
  360. init_b=[0.0, 0.0],
  361. g=(g1, g2),
  362. expansion=False) if y3 > 0 else nn.Sequential(),
  363. ChannelShuffle(g2) if shuffle else nn.Sequential(),
  364. ChannelShuffle(oup // 2)
  365. if shuffle and y3 != 0 else nn.Sequential(), )
  366. def forward(self, x):
  367. identity = x
  368. out = self.layers(x)
  369. if self.identity:
  370. out = out + identity
  371. return out
  372. class MicroNet(nn.Layer):
  373. """
  374. the MicroNet backbone network for recognition module.
  375. Args:
  376. mode(str): {'M0', 'M1', 'M2', 'M3'}
  377. Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds)
  378. Default: 'M3'.
  379. """
  380. def __init__(self, mode='M3', **kwargs):
  381. super(MicroNet, self).__init__()
  382. self.cfgs = get_micronet_config(mode)
  383. activation_cfg = {}
  384. if mode == 'M0':
  385. input_channel = 4
  386. stem_groups = 2, 2
  387. out_ch = 384
  388. activation_cfg['init_a'] = 1.0, 1.0
  389. activation_cfg['init_b'] = 0.0, 0.0
  390. elif mode == 'M1':
  391. input_channel = 6
  392. stem_groups = 3, 2
  393. out_ch = 576
  394. activation_cfg['init_a'] = 1.0, 1.0
  395. activation_cfg['init_b'] = 0.0, 0.0
  396. elif mode == 'M2':
  397. input_channel = 8
  398. stem_groups = 4, 2
  399. out_ch = 768
  400. activation_cfg['init_a'] = 1.0, 1.0
  401. activation_cfg['init_b'] = 0.0, 0.0
  402. elif mode == 'M3':
  403. input_channel = 12
  404. stem_groups = 4, 3
  405. out_ch = 432
  406. activation_cfg['init_a'] = 1.0, 0.5
  407. activation_cfg['init_b'] = 0.0, 0.5
  408. else:
  409. raise NotImplementedError("mode[" + mode +
  410. "_model] is not implemented!")
  411. layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)]
  412. for idx, val in enumerate(self.cfgs):
  413. s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val
  414. t1 = (c1, c2)
  415. gs1 = (g1, g2)
  416. gs2 = (c3, g3, g4)
  417. activation_cfg['dy'] = [y1, y2, y3]
  418. activation_cfg['ratio'] = r
  419. output_channel = c
  420. layers.append(
  421. DYMicroBlock(
  422. input_channel,
  423. output_channel,
  424. kernel_size=ks,
  425. stride=s,
  426. ch_exp=t1,
  427. ch_per_group=gs1,
  428. groups_1x1=gs2,
  429. depthsep=True,
  430. shuffle=True,
  431. activation_cfg=activation_cfg, ))
  432. input_channel = output_channel
  433. for i in range(1, n):
  434. layers.append(
  435. DYMicroBlock(
  436. input_channel,
  437. output_channel,
  438. kernel_size=ks,
  439. stride=1,
  440. ch_exp=t1,
  441. ch_per_group=gs1,
  442. groups_1x1=gs2,
  443. depthsep=True,
  444. shuffle=True,
  445. activation_cfg=activation_cfg, ))
  446. input_channel = output_channel
  447. self.features = nn.Sequential(*layers)
  448. self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
  449. self.out_channels = make_divisible(out_ch)
  450. def forward(self, x):
  451. x = self.features(x)
  452. x = self.pool(x)
  453. return x