csp_pan.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # The code is based on:
  15. # https://github.com/PaddlePaddle/PaddleDetection/blob/release%2F2.3/ppdet/modeling/necks/csp_pan.py
  16. import paddle
  17. import paddle.nn as nn
  18. import paddle.nn.functional as F
  19. from paddle import ParamAttr
  20. __all__ = ['CSPPAN']
  21. class ConvBNLayer(nn.Layer):
  22. def __init__(self,
  23. in_channel=96,
  24. out_channel=96,
  25. kernel_size=3,
  26. stride=1,
  27. groups=1,
  28. act='leaky_relu'):
  29. super(ConvBNLayer, self).__init__()
  30. initializer = nn.initializer.KaimingUniform()
  31. self.act = act
  32. assert self.act in ['leaky_relu', "hard_swish"]
  33. self.conv = nn.Conv2D(
  34. in_channels=in_channel,
  35. out_channels=out_channel,
  36. kernel_size=kernel_size,
  37. groups=groups,
  38. padding=(kernel_size - 1) // 2,
  39. stride=stride,
  40. weight_attr=ParamAttr(initializer=initializer),
  41. bias_attr=False)
  42. self.bn = nn.BatchNorm2D(out_channel)
  43. def forward(self, x):
  44. x = self.bn(self.conv(x))
  45. if self.act == "leaky_relu":
  46. x = F.leaky_relu(x)
  47. elif self.act == "hard_swish":
  48. x = F.hardswish(x)
  49. return x
  50. class DPModule(nn.Layer):
  51. """
  52. Depth-wise and point-wise module.
  53. Args:
  54. in_channel (int): The input channels of this Module.
  55. out_channel (int): The output channels of this Module.
  56. kernel_size (int): The conv2d kernel size of this Module.
  57. stride (int): The conv2d's stride of this Module.
  58. act (str): The activation function of this Module,
  59. Now support `leaky_relu` and `hard_swish`.
  60. """
  61. def __init__(self,
  62. in_channel=96,
  63. out_channel=96,
  64. kernel_size=3,
  65. stride=1,
  66. act='leaky_relu'):
  67. super(DPModule, self).__init__()
  68. initializer = nn.initializer.KaimingUniform()
  69. self.act = act
  70. self.dwconv = nn.Conv2D(
  71. in_channels=in_channel,
  72. out_channels=out_channel,
  73. kernel_size=kernel_size,
  74. groups=out_channel,
  75. padding=(kernel_size - 1) // 2,
  76. stride=stride,
  77. weight_attr=ParamAttr(initializer=initializer),
  78. bias_attr=False)
  79. self.bn1 = nn.BatchNorm2D(out_channel)
  80. self.pwconv = nn.Conv2D(
  81. in_channels=out_channel,
  82. out_channels=out_channel,
  83. kernel_size=1,
  84. groups=1,
  85. padding=0,
  86. weight_attr=ParamAttr(initializer=initializer),
  87. bias_attr=False)
  88. self.bn2 = nn.BatchNorm2D(out_channel)
  89. def act_func(self, x):
  90. if self.act == "leaky_relu":
  91. x = F.leaky_relu(x)
  92. elif self.act == "hard_swish":
  93. x = F.hardswish(x)
  94. return x
  95. def forward(self, x):
  96. x = self.act_func(self.bn1(self.dwconv(x)))
  97. x = self.act_func(self.bn2(self.pwconv(x)))
  98. return x
  99. class DarknetBottleneck(nn.Layer):
  100. """The basic bottleneck block used in Darknet.
  101. Each Block consists of two ConvModules and the input is added to the
  102. final output. Each ConvModule is composed of Conv, BN, and act.
  103. The first convLayer has filter size of 1x1 and the second one has the
  104. filter size of 3x3.
  105. Args:
  106. in_channels (int): The input channels of this Module.
  107. out_channels (int): The output channels of this Module.
  108. expansion (int): The kernel size of the convolution. Default: 0.5
  109. add_identity (bool): Whether to add identity to the out.
  110. Default: True
  111. use_depthwise (bool): Whether to use depthwise separable convolution.
  112. Default: False
  113. """
  114. def __init__(self,
  115. in_channels,
  116. out_channels,
  117. kernel_size=3,
  118. expansion=0.5,
  119. add_identity=True,
  120. use_depthwise=False,
  121. act="leaky_relu"):
  122. super(DarknetBottleneck, self).__init__()
  123. hidden_channels = int(out_channels * expansion)
  124. conv_func = DPModule if use_depthwise else ConvBNLayer
  125. self.conv1 = ConvBNLayer(
  126. in_channel=in_channels,
  127. out_channel=hidden_channels,
  128. kernel_size=1,
  129. act=act)
  130. self.conv2 = conv_func(
  131. in_channel=hidden_channels,
  132. out_channel=out_channels,
  133. kernel_size=kernel_size,
  134. stride=1,
  135. act=act)
  136. self.add_identity = \
  137. add_identity and in_channels == out_channels
  138. def forward(self, x):
  139. identity = x
  140. out = self.conv1(x)
  141. out = self.conv2(out)
  142. if self.add_identity:
  143. return out + identity
  144. else:
  145. return out
  146. class CSPLayer(nn.Layer):
  147. """Cross Stage Partial Layer.
  148. Args:
  149. in_channels (int): The input channels of the CSP layer.
  150. out_channels (int): The output channels of the CSP layer.
  151. expand_ratio (float): Ratio to adjust the number of channels of the
  152. hidden layer. Default: 0.5
  153. num_blocks (int): Number of blocks. Default: 1
  154. add_identity (bool): Whether to add identity in blocks.
  155. Default: True
  156. use_depthwise (bool): Whether to depthwise separable convolution in
  157. blocks. Default: False
  158. """
  159. def __init__(self,
  160. in_channels,
  161. out_channels,
  162. kernel_size=3,
  163. expand_ratio=0.5,
  164. num_blocks=1,
  165. add_identity=True,
  166. use_depthwise=False,
  167. act="leaky_relu"):
  168. super().__init__()
  169. mid_channels = int(out_channels * expand_ratio)
  170. self.main_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
  171. self.short_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
  172. self.final_conv = ConvBNLayer(
  173. 2 * mid_channels, out_channels, 1, act=act)
  174. self.blocks = nn.Sequential(* [
  175. DarknetBottleneck(
  176. mid_channels,
  177. mid_channels,
  178. kernel_size,
  179. 1.0,
  180. add_identity,
  181. use_depthwise,
  182. act=act) for _ in range(num_blocks)
  183. ])
  184. def forward(self, x):
  185. x_short = self.short_conv(x)
  186. x_main = self.main_conv(x)
  187. x_main = self.blocks(x_main)
  188. x_final = paddle.concat((x_main, x_short), axis=1)
  189. return self.final_conv(x_final)
  190. class Channel_T(nn.Layer):
  191. def __init__(self,
  192. in_channels=[116, 232, 464],
  193. out_channels=96,
  194. act="leaky_relu"):
  195. super(Channel_T, self).__init__()
  196. self.convs = nn.LayerList()
  197. for i in range(len(in_channels)):
  198. self.convs.append(
  199. ConvBNLayer(
  200. in_channels[i], out_channels, 1, act=act))
  201. def forward(self, x):
  202. outs = [self.convs[i](x[i]) for i in range(len(x))]
  203. return outs
  204. class CSPPAN(nn.Layer):
  205. """Path Aggregation Network with CSP module.
  206. Args:
  207. in_channels (List[int]): Number of input channels per scale.
  208. out_channels (int): Number of output channels (used at each scale)
  209. kernel_size (int): The conv2d kernel size of this Module.
  210. num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
  211. use_depthwise (bool): Whether to depthwise separable convolution in
  212. blocks. Default: True
  213. """
  214. def __init__(self,
  215. in_channels,
  216. out_channels,
  217. kernel_size=5,
  218. num_csp_blocks=1,
  219. use_depthwise=True,
  220. act='hard_swish'):
  221. super(CSPPAN, self).__init__()
  222. self.in_channels = in_channels
  223. self.out_channels = [out_channels] * len(in_channels)
  224. conv_func = DPModule if use_depthwise else ConvBNLayer
  225. self.conv_t = Channel_T(in_channels, out_channels, act=act)
  226. # build top-down blocks
  227. self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
  228. self.top_down_blocks = nn.LayerList()
  229. for idx in range(len(in_channels) - 1, 0, -1):
  230. self.top_down_blocks.append(
  231. CSPLayer(
  232. out_channels * 2,
  233. out_channels,
  234. kernel_size=kernel_size,
  235. num_blocks=num_csp_blocks,
  236. add_identity=False,
  237. use_depthwise=use_depthwise,
  238. act=act))
  239. # build bottom-up blocks
  240. self.downsamples = nn.LayerList()
  241. self.bottom_up_blocks = nn.LayerList()
  242. for idx in range(len(in_channels) - 1):
  243. self.downsamples.append(
  244. conv_func(
  245. out_channels,
  246. out_channels,
  247. kernel_size=kernel_size,
  248. stride=2,
  249. act=act))
  250. self.bottom_up_blocks.append(
  251. CSPLayer(
  252. out_channels * 2,
  253. out_channels,
  254. kernel_size=kernel_size,
  255. num_blocks=num_csp_blocks,
  256. add_identity=False,
  257. use_depthwise=use_depthwise,
  258. act=act))
  259. def forward(self, inputs):
  260. """
  261. Args:
  262. inputs (tuple[Tensor]): input features.
  263. Returns:
  264. tuple[Tensor]: CSPPAN features.
  265. """
  266. assert len(inputs) == len(self.in_channels)
  267. inputs = self.conv_t(inputs)
  268. # top-down path
  269. inner_outs = [inputs[-1]]
  270. for idx in range(len(self.in_channels) - 1, 0, -1):
  271. feat_heigh = inner_outs[0]
  272. feat_low = inputs[idx - 1]
  273. upsample_feat = F.upsample(
  274. feat_heigh, size=paddle.shape(feat_low)[2:4], mode="nearest")
  275. inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
  276. paddle.concat([upsample_feat, feat_low], 1))
  277. inner_outs.insert(0, inner_out)
  278. # bottom-up path
  279. outs = [inner_outs[0]]
  280. for idx in range(len(self.in_channels) - 1):
  281. feat_low = outs[-1]
  282. feat_height = inner_outs[idx + 1]
  283. downsample_feat = self.downsamples[idx](feat_low)
  284. out = self.bottom_up_blocks[idx](paddle.concat(
  285. [downsample_feat, feat_height], 1))
  286. outs.append(out)
  287. return tuple(outs)