db_fpn.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. # copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. from paddle import nn
  19. import paddle.nn.functional as F
  20. from paddle import ParamAttr
  21. import os
  22. import sys
  23. __dir__ = os.path.dirname(os.path.abspath(__file__))
  24. sys.path.append(__dir__)
  25. sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))
  26. from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule
  27. class DSConv(nn.Layer):
  28. def __init__(self,
  29. in_channels,
  30. out_channels,
  31. kernel_size,
  32. padding,
  33. stride=1,
  34. groups=None,
  35. if_act=True,
  36. act="relu",
  37. **kwargs):
  38. super(DSConv, self).__init__()
  39. if groups == None:
  40. groups = in_channels
  41. self.if_act = if_act
  42. self.act = act
  43. self.conv1 = nn.Conv2D(
  44. in_channels=in_channels,
  45. out_channels=in_channels,
  46. kernel_size=kernel_size,
  47. stride=stride,
  48. padding=padding,
  49. groups=groups,
  50. bias_attr=False)
  51. self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None)
  52. self.conv2 = nn.Conv2D(
  53. in_channels=in_channels,
  54. out_channels=int(in_channels * 4),
  55. kernel_size=1,
  56. stride=1,
  57. bias_attr=False)
  58. self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None)
  59. self.conv3 = nn.Conv2D(
  60. in_channels=int(in_channels * 4),
  61. out_channels=out_channels,
  62. kernel_size=1,
  63. stride=1,
  64. bias_attr=False)
  65. self._c = [in_channels, out_channels]
  66. if in_channels != out_channels:
  67. self.conv_end = nn.Conv2D(
  68. in_channels=in_channels,
  69. out_channels=out_channels,
  70. kernel_size=1,
  71. stride=1,
  72. bias_attr=False)
  73. def forward(self, inputs):
  74. x = self.conv1(inputs)
  75. x = self.bn1(x)
  76. x = self.conv2(x)
  77. x = self.bn2(x)
  78. if self.if_act:
  79. if self.act == "relu":
  80. x = F.relu(x)
  81. elif self.act == "hardswish":
  82. x = F.hardswish(x)
  83. else:
  84. print("The activation function({}) is selected incorrectly.".
  85. format(self.act))
  86. exit()
  87. x = self.conv3(x)
  88. if self._c[0] != self._c[1]:
  89. x = x + self.conv_end(inputs)
  90. return x
  91. class DBFPN(nn.Layer):
  92. def __init__(self, in_channels, out_channels, use_asf=False, **kwargs):
  93. super(DBFPN, self).__init__()
  94. self.out_channels = out_channels
  95. self.use_asf = use_asf
  96. weight_attr = paddle.nn.initializer.KaimingUniform()
  97. self.in2_conv = nn.Conv2D(
  98. in_channels=in_channels[0],
  99. out_channels=self.out_channels,
  100. kernel_size=1,
  101. weight_attr=ParamAttr(initializer=weight_attr),
  102. bias_attr=False)
  103. self.in3_conv = nn.Conv2D(
  104. in_channels=in_channels[1],
  105. out_channels=self.out_channels,
  106. kernel_size=1,
  107. weight_attr=ParamAttr(initializer=weight_attr),
  108. bias_attr=False)
  109. self.in4_conv = nn.Conv2D(
  110. in_channels=in_channels[2],
  111. out_channels=self.out_channels,
  112. kernel_size=1,
  113. weight_attr=ParamAttr(initializer=weight_attr),
  114. bias_attr=False)
  115. self.in5_conv = nn.Conv2D(
  116. in_channels=in_channels[3],
  117. out_channels=self.out_channels,
  118. kernel_size=1,
  119. weight_attr=ParamAttr(initializer=weight_attr),
  120. bias_attr=False)
  121. self.p5_conv = nn.Conv2D(
  122. in_channels=self.out_channels,
  123. out_channels=self.out_channels // 4,
  124. kernel_size=3,
  125. padding=1,
  126. weight_attr=ParamAttr(initializer=weight_attr),
  127. bias_attr=False)
  128. self.p4_conv = nn.Conv2D(
  129. in_channels=self.out_channels,
  130. out_channels=self.out_channels // 4,
  131. kernel_size=3,
  132. padding=1,
  133. weight_attr=ParamAttr(initializer=weight_attr),
  134. bias_attr=False)
  135. self.p3_conv = nn.Conv2D(
  136. in_channels=self.out_channels,
  137. out_channels=self.out_channels // 4,
  138. kernel_size=3,
  139. padding=1,
  140. weight_attr=ParamAttr(initializer=weight_attr),
  141. bias_attr=False)
  142. self.p2_conv = nn.Conv2D(
  143. in_channels=self.out_channels,
  144. out_channels=self.out_channels // 4,
  145. kernel_size=3,
  146. padding=1,
  147. weight_attr=ParamAttr(initializer=weight_attr),
  148. bias_attr=False)
  149. if self.use_asf is True:
  150. self.asf = ASFBlock(self.out_channels, self.out_channels // 4)
  151. def forward(self, x):
  152. c2, c3, c4, c5 = x
  153. in5 = self.in5_conv(c5)
  154. in4 = self.in4_conv(c4)
  155. in3 = self.in3_conv(c3)
  156. in2 = self.in2_conv(c2)
  157. out4 = in4 + F.upsample(
  158. in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
  159. out3 = in3 + F.upsample(
  160. out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
  161. out2 = in2 + F.upsample(
  162. out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
  163. p5 = self.p5_conv(in5)
  164. p4 = self.p4_conv(out4)
  165. p3 = self.p3_conv(out3)
  166. p2 = self.p2_conv(out2)
  167. p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
  168. p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
  169. p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
  170. fuse = paddle.concat([p5, p4, p3, p2], axis=1)
  171. if self.use_asf is True:
  172. fuse = self.asf(fuse, [p5, p4, p3, p2])
  173. return fuse
  174. class RSELayer(nn.Layer):
  175. def __init__(self, in_channels, out_channels, kernel_size, shortcut=True):
  176. super(RSELayer, self).__init__()
  177. weight_attr = paddle.nn.initializer.KaimingUniform()
  178. self.out_channels = out_channels
  179. self.in_conv = nn.Conv2D(
  180. in_channels=in_channels,
  181. out_channels=self.out_channels,
  182. kernel_size=kernel_size,
  183. padding=int(kernel_size // 2),
  184. weight_attr=ParamAttr(initializer=weight_attr),
  185. bias_attr=False)
  186. self.se_block = SEModule(self.out_channels)
  187. self.shortcut = shortcut
  188. def forward(self, ins):
  189. x = self.in_conv(ins)
  190. if self.shortcut:
  191. out = x + self.se_block(x)
  192. else:
  193. out = self.se_block(x)
  194. return out
  195. class RSEFPN(nn.Layer):
  196. def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
  197. super(RSEFPN, self).__init__()
  198. self.out_channels = out_channels
  199. self.ins_conv = nn.LayerList()
  200. self.inp_conv = nn.LayerList()
  201. for i in range(len(in_channels)):
  202. self.ins_conv.append(
  203. RSELayer(
  204. in_channels[i],
  205. out_channels,
  206. kernel_size=1,
  207. shortcut=shortcut))
  208. self.inp_conv.append(
  209. RSELayer(
  210. out_channels,
  211. out_channels // 4,
  212. kernel_size=3,
  213. shortcut=shortcut))
  214. def forward(self, x):
  215. c2, c3, c4, c5 = x
  216. in5 = self.ins_conv[3](c5)
  217. in4 = self.ins_conv[2](c4)
  218. in3 = self.ins_conv[1](c3)
  219. in2 = self.ins_conv[0](c2)
  220. out4 = in4 + F.upsample(
  221. in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
  222. out3 = in3 + F.upsample(
  223. out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
  224. out2 = in2 + F.upsample(
  225. out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
  226. p5 = self.inp_conv[3](in5)
  227. p4 = self.inp_conv[2](out4)
  228. p3 = self.inp_conv[1](out3)
  229. p2 = self.inp_conv[0](out2)
  230. p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
  231. p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
  232. p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
  233. fuse = paddle.concat([p5, p4, p3, p2], axis=1)
  234. return fuse
  235. class LKPAN(nn.Layer):
  236. def __init__(self, in_channels, out_channels, mode='large', **kwargs):
  237. super(LKPAN, self).__init__()
  238. self.out_channels = out_channels
  239. weight_attr = paddle.nn.initializer.KaimingUniform()
  240. self.ins_conv = nn.LayerList()
  241. self.inp_conv = nn.LayerList()
  242. # pan head
  243. self.pan_head_conv = nn.LayerList()
  244. self.pan_lat_conv = nn.LayerList()
  245. if mode.lower() == 'lite':
  246. p_layer = DSConv
  247. elif mode.lower() == 'large':
  248. p_layer = nn.Conv2D
  249. else:
  250. raise ValueError(
  251. "mode can only be one of ['lite', 'large'], but received {}".
  252. format(mode))
  253. for i in range(len(in_channels)):
  254. self.ins_conv.append(
  255. nn.Conv2D(
  256. in_channels=in_channels[i],
  257. out_channels=self.out_channels,
  258. kernel_size=1,
  259. weight_attr=ParamAttr(initializer=weight_attr),
  260. bias_attr=False))
  261. self.inp_conv.append(
  262. p_layer(
  263. in_channels=self.out_channels,
  264. out_channels=self.out_channels // 4,
  265. kernel_size=9,
  266. padding=4,
  267. weight_attr=ParamAttr(initializer=weight_attr),
  268. bias_attr=False))
  269. if i > 0:
  270. self.pan_head_conv.append(
  271. nn.Conv2D(
  272. in_channels=self.out_channels // 4,
  273. out_channels=self.out_channels // 4,
  274. kernel_size=3,
  275. padding=1,
  276. stride=2,
  277. weight_attr=ParamAttr(initializer=weight_attr),
  278. bias_attr=False))
  279. self.pan_lat_conv.append(
  280. p_layer(
  281. in_channels=self.out_channels // 4,
  282. out_channels=self.out_channels // 4,
  283. kernel_size=9,
  284. padding=4,
  285. weight_attr=ParamAttr(initializer=weight_attr),
  286. bias_attr=False))
  287. def forward(self, x):
  288. c2, c3, c4, c5 = x
  289. in5 = self.ins_conv[3](c5)
  290. in4 = self.ins_conv[2](c4)
  291. in3 = self.ins_conv[1](c3)
  292. in2 = self.ins_conv[0](c2)
  293. out4 = in4 + F.upsample(
  294. in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
  295. out3 = in3 + F.upsample(
  296. out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
  297. out2 = in2 + F.upsample(
  298. out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
  299. f5 = self.inp_conv[3](in5)
  300. f4 = self.inp_conv[2](out4)
  301. f3 = self.inp_conv[1](out3)
  302. f2 = self.inp_conv[0](out2)
  303. pan3 = f3 + self.pan_head_conv[0](f2)
  304. pan4 = f4 + self.pan_head_conv[1](pan3)
  305. pan5 = f5 + self.pan_head_conv[2](pan4)
  306. p2 = self.pan_lat_conv[0](f2)
  307. p3 = self.pan_lat_conv[1](pan3)
  308. p4 = self.pan_lat_conv[2](pan4)
  309. p5 = self.pan_lat_conv[3](pan5)
  310. p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
  311. p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
  312. p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
  313. fuse = paddle.concat([p5, p4, p3, p2], axis=1)
  314. return fuse
  315. class ASFBlock(nn.Layer):
  316. """
  317. This code is refered from:
  318. https://github.com/MhLiao/DB/blob/master/decoders/feature_attention.py
  319. """
  320. def __init__(self, in_channels, inter_channels, out_features_num=4):
  321. """
  322. Adaptive Scale Fusion (ASF) block of DBNet++
  323. Args:
  324. in_channels: the number of channels in the input data
  325. inter_channels: the number of middle channels
  326. out_features_num: the number of fused stages
  327. """
  328. super(ASFBlock, self).__init__()
  329. weight_attr = paddle.nn.initializer.KaimingUniform()
  330. self.in_channels = in_channels
  331. self.inter_channels = inter_channels
  332. self.out_features_num = out_features_num
  333. self.conv = nn.Conv2D(in_channels, inter_channels, 3, padding=1)
  334. self.spatial_scale = nn.Sequential(
  335. #Nx1xHxW
  336. nn.Conv2D(
  337. in_channels=1,
  338. out_channels=1,
  339. kernel_size=3,
  340. bias_attr=False,
  341. padding=1,
  342. weight_attr=ParamAttr(initializer=weight_attr)),
  343. nn.ReLU(),
  344. nn.Conv2D(
  345. in_channels=1,
  346. out_channels=1,
  347. kernel_size=1,
  348. bias_attr=False,
  349. weight_attr=ParamAttr(initializer=weight_attr)),
  350. nn.Sigmoid())
  351. self.channel_scale = nn.Sequential(
  352. nn.Conv2D(
  353. in_channels=inter_channels,
  354. out_channels=out_features_num,
  355. kernel_size=1,
  356. bias_attr=False,
  357. weight_attr=ParamAttr(initializer=weight_attr)),
  358. nn.Sigmoid())
  359. def forward(self, fuse_features, features_list):
  360. fuse_features = self.conv(fuse_features)
  361. spatial_x = paddle.mean(fuse_features, axis=1, keepdim=True)
  362. attention_scores = self.spatial_scale(spatial_x) + fuse_features
  363. attention_scores = self.channel_scale(attention_scores)
  364. assert len(features_list) == self.out_features_num
  365. out_list = []
  366. for i in range(self.out_features_num):
  367. out_list.append(attention_scores[:, i:i + 1] * features_list[i])
  368. return paddle.concat(out_list, axis=1)