fce_fpn.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py
  17. """
  18. import paddle.nn as nn
  19. import paddle.nn.functional as F
  20. from paddle import ParamAttr
  21. from paddle.nn.initializer import XavierUniform
  22. from paddle.nn.initializer import Normal
  23. from paddle.regularizer import L2Decay
  24. __all__ = ['FCEFPN']
  25. class ConvNormLayer(nn.Layer):
  26. def __init__(self,
  27. ch_in,
  28. ch_out,
  29. filter_size,
  30. stride,
  31. groups=1,
  32. norm_type='bn',
  33. norm_decay=0.,
  34. norm_groups=32,
  35. lr_scale=1.,
  36. freeze_norm=False,
  37. initializer=Normal(
  38. mean=0., std=0.01)):
  39. super(ConvNormLayer, self).__init__()
  40. assert norm_type in ['bn', 'sync_bn', 'gn']
  41. bias_attr = False
  42. self.conv = nn.Conv2D(
  43. in_channels=ch_in,
  44. out_channels=ch_out,
  45. kernel_size=filter_size,
  46. stride=stride,
  47. padding=(filter_size - 1) // 2,
  48. groups=groups,
  49. weight_attr=ParamAttr(
  50. initializer=initializer, learning_rate=1.),
  51. bias_attr=bias_attr)
  52. norm_lr = 0. if freeze_norm else 1.
  53. param_attr = ParamAttr(
  54. learning_rate=norm_lr,
  55. regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
  56. bias_attr = ParamAttr(
  57. learning_rate=norm_lr,
  58. regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
  59. if norm_type == 'bn':
  60. self.norm = nn.BatchNorm2D(
  61. ch_out, weight_attr=param_attr, bias_attr=bias_attr)
  62. elif norm_type == 'sync_bn':
  63. self.norm = nn.SyncBatchNorm(
  64. ch_out, weight_attr=param_attr, bias_attr=bias_attr)
  65. elif norm_type == 'gn':
  66. self.norm = nn.GroupNorm(
  67. num_groups=norm_groups,
  68. num_channels=ch_out,
  69. weight_attr=param_attr,
  70. bias_attr=bias_attr)
  71. def forward(self, inputs):
  72. out = self.conv(inputs)
  73. out = self.norm(out)
  74. return out
  75. class FCEFPN(nn.Layer):
  76. """
  77. Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
  78. Args:
  79. in_channels (list[int]): input channels of each level which can be
  80. derived from the output shape of backbone by from_config
  81. out_channels (list[int]): output channel of each level
  82. spatial_scales (list[float]): the spatial scales between input feature
  83. maps and original input image which can be derived from the output
  84. shape of backbone by from_config
  85. has_extra_convs (bool): whether to add extra conv to the last level.
  86. default False
  87. extra_stage (int): the number of extra stages added to the last level.
  88. default 1
  89. use_c5 (bool): Whether to use c5 as the input of extra stage,
  90. otherwise p5 is used. default True
  91. norm_type (string|None): The normalization type in FPN module. If
  92. norm_type is None, norm will not be used after conv and if
  93. norm_type is string, bn, gn, sync_bn are available. default None
  94. norm_decay (float): weight decay for normalization layer weights.
  95. default 0.
  96. freeze_norm (bool): whether to freeze normalization layer.
  97. default False
  98. relu_before_extra_convs (bool): whether to add relu before extra convs.
  99. default False
  100. """
  101. def __init__(self,
  102. in_channels,
  103. out_channels,
  104. spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
  105. has_extra_convs=False,
  106. extra_stage=1,
  107. use_c5=True,
  108. norm_type=None,
  109. norm_decay=0.,
  110. freeze_norm=False,
  111. relu_before_extra_convs=True):
  112. super(FCEFPN, self).__init__()
  113. self.out_channels = out_channels
  114. for s in range(extra_stage):
  115. spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
  116. self.spatial_scales = spatial_scales
  117. self.has_extra_convs = has_extra_convs
  118. self.extra_stage = extra_stage
  119. self.use_c5 = use_c5
  120. self.relu_before_extra_convs = relu_before_extra_convs
  121. self.norm_type = norm_type
  122. self.norm_decay = norm_decay
  123. self.freeze_norm = freeze_norm
  124. self.lateral_convs = []
  125. self.fpn_convs = []
  126. fan = out_channels * 3 * 3
  127. # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
  128. # 0 <= st_stage < ed_stage <= 3
  129. st_stage = 4 - len(in_channels)
  130. ed_stage = st_stage + len(in_channels) - 1
  131. for i in range(st_stage, ed_stage + 1):
  132. if i == 3:
  133. lateral_name = 'fpn_inner_res5_sum'
  134. else:
  135. lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
  136. in_c = in_channels[i - st_stage]
  137. if self.norm_type is not None:
  138. lateral = self.add_sublayer(
  139. lateral_name,
  140. ConvNormLayer(
  141. ch_in=in_c,
  142. ch_out=out_channels,
  143. filter_size=1,
  144. stride=1,
  145. norm_type=self.norm_type,
  146. norm_decay=self.norm_decay,
  147. freeze_norm=self.freeze_norm,
  148. initializer=XavierUniform(fan_out=in_c)))
  149. else:
  150. lateral = self.add_sublayer(
  151. lateral_name,
  152. nn.Conv2D(
  153. in_channels=in_c,
  154. out_channels=out_channels,
  155. kernel_size=1,
  156. weight_attr=ParamAttr(
  157. initializer=XavierUniform(fan_out=in_c))))
  158. self.lateral_convs.append(lateral)
  159. for i in range(st_stage, ed_stage + 1):
  160. fpn_name = 'fpn_res{}_sum'.format(i + 2)
  161. if self.norm_type is not None:
  162. fpn_conv = self.add_sublayer(
  163. fpn_name,
  164. ConvNormLayer(
  165. ch_in=out_channels,
  166. ch_out=out_channels,
  167. filter_size=3,
  168. stride=1,
  169. norm_type=self.norm_type,
  170. norm_decay=self.norm_decay,
  171. freeze_norm=self.freeze_norm,
  172. initializer=XavierUniform(fan_out=fan)))
  173. else:
  174. fpn_conv = self.add_sublayer(
  175. fpn_name,
  176. nn.Conv2D(
  177. in_channels=out_channels,
  178. out_channels=out_channels,
  179. kernel_size=3,
  180. padding=1,
  181. weight_attr=ParamAttr(
  182. initializer=XavierUniform(fan_out=fan))))
  183. self.fpn_convs.append(fpn_conv)
  184. # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
  185. if self.has_extra_convs:
  186. for i in range(self.extra_stage):
  187. lvl = ed_stage + 1 + i
  188. if i == 0 and self.use_c5:
  189. in_c = in_channels[-1]
  190. else:
  191. in_c = out_channels
  192. extra_fpn_name = 'fpn_{}'.format(lvl + 2)
  193. if self.norm_type is not None:
  194. extra_fpn_conv = self.add_sublayer(
  195. extra_fpn_name,
  196. ConvNormLayer(
  197. ch_in=in_c,
  198. ch_out=out_channels,
  199. filter_size=3,
  200. stride=2,
  201. norm_type=self.norm_type,
  202. norm_decay=self.norm_decay,
  203. freeze_norm=self.freeze_norm,
  204. initializer=XavierUniform(fan_out=fan)))
  205. else:
  206. extra_fpn_conv = self.add_sublayer(
  207. extra_fpn_name,
  208. nn.Conv2D(
  209. in_channels=in_c,
  210. out_channels=out_channels,
  211. kernel_size=3,
  212. stride=2,
  213. padding=1,
  214. weight_attr=ParamAttr(
  215. initializer=XavierUniform(fan_out=fan))))
  216. self.fpn_convs.append(extra_fpn_conv)
  217. @classmethod
  218. def from_config(cls, cfg, input_shape):
  219. return {
  220. 'in_channels': [i.channels for i in input_shape],
  221. 'spatial_scales': [1.0 / i.stride for i in input_shape],
  222. }
  223. def forward(self, body_feats):
  224. laterals = []
  225. num_levels = len(body_feats)
  226. for i in range(num_levels):
  227. laterals.append(self.lateral_convs[i](body_feats[i]))
  228. for i in range(1, num_levels):
  229. lvl = num_levels - i
  230. upsample = F.interpolate(
  231. laterals[lvl],
  232. scale_factor=2.,
  233. mode='nearest', )
  234. laterals[lvl - 1] += upsample
  235. fpn_output = []
  236. for lvl in range(num_levels):
  237. fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
  238. if self.extra_stage > 0:
  239. # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
  240. if not self.has_extra_convs:
  241. assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
  242. fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
  243. # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
  244. else:
  245. if self.use_c5:
  246. extra_source = body_feats[-1]
  247. else:
  248. extra_source = fpn_output[-1]
  249. fpn_output.append(self.fpn_convs[num_levels](extra_source))
  250. for i in range(1, self.extra_stage):
  251. if self.relu_before_extra_convs:
  252. fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
  253. fpn_output[-1])))
  254. else:
  255. fpn_output.append(self.fpn_convs[num_levels + i](
  256. fpn_output[-1]))
  257. return fpn_output