fce_postprocess.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/open-mmlab/mmocr/blob/v0.3.0/mmocr/models/textdet/postprocess/wrapper.py
  17. """
  18. import cv2
  19. import paddle
  20. import numpy as np
  21. from numpy.fft import ifft
  22. from ppocr.utils.poly_nms import poly_nms, valid_boundary
  23. def fill_hole(input_mask):
  24. h, w = input_mask.shape
  25. canvas = np.zeros((h + 2, w + 2), np.uint8)
  26. canvas[1:h + 1, 1:w + 1] = input_mask.copy()
  27. mask = np.zeros((h + 4, w + 4), np.uint8)
  28. cv2.floodFill(canvas, mask, (0, 0), 1)
  29. canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
  30. return ~canvas | input_mask
  31. def fourier2poly(fourier_coeff, num_reconstr_points=50):
  32. """ Inverse Fourier transform
  33. Args:
  34. fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1),
  35. with n and k being candidates number and Fourier degree
  36. respectively.
  37. num_reconstr_points (int): Number of reconstructed polygon points.
  38. Returns:
  39. Polygons (ndarray): The reconstructed polygons shaped (n, n')
  40. """
  41. a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex')
  42. k = (len(fourier_coeff[0]) - 1) // 2
  43. a[:, 0:k + 1] = fourier_coeff[:, k:]
  44. a[:, -k:] = fourier_coeff[:, :k]
  45. poly_complex = ifft(a) * num_reconstr_points
  46. polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2))
  47. polygon[:, :, 0] = poly_complex.real
  48. polygon[:, :, 1] = poly_complex.imag
  49. return polygon.astype('int32').reshape((len(fourier_coeff), -1))
  50. class FCEPostProcess(object):
  51. """
  52. The post process for FCENet.
  53. """
  54. def __init__(self,
  55. scales,
  56. fourier_degree=5,
  57. num_reconstr_points=50,
  58. decoding_type='fcenet',
  59. score_thr=0.3,
  60. nms_thr=0.1,
  61. alpha=1.0,
  62. beta=1.0,
  63. box_type='poly',
  64. **kwargs):
  65. self.scales = scales
  66. self.fourier_degree = fourier_degree
  67. self.num_reconstr_points = num_reconstr_points
  68. self.decoding_type = decoding_type
  69. self.score_thr = score_thr
  70. self.nms_thr = nms_thr
  71. self.alpha = alpha
  72. self.beta = beta
  73. self.box_type = box_type
  74. def __call__(self, preds, shape_list):
  75. score_maps = []
  76. for key, value in preds.items():
  77. if isinstance(value, paddle.Tensor):
  78. value = value.numpy()
  79. cls_res = value[:, :4, :, :]
  80. reg_res = value[:, 4:, :, :]
  81. score_maps.append([cls_res, reg_res])
  82. return self.get_boundary(score_maps, shape_list)
  83. def resize_boundary(self, boundaries, scale_factor):
  84. """Rescale boundaries via scale_factor.
  85. Args:
  86. boundaries (list[list[float]]): The boundary list. Each boundary
  87. with size 2k+1 with k>=4.
  88. scale_factor(ndarray): The scale factor of size (4,).
  89. Returns:
  90. boundaries (list[list[float]]): The scaled boundaries.
  91. """
  92. boxes = []
  93. scores = []
  94. for b in boundaries:
  95. sz = len(b)
  96. valid_boundary(b, True)
  97. scores.append(b[-1])
  98. b = (np.array(b[:sz - 1]) *
  99. (np.tile(scale_factor[:2], int(
  100. (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist()
  101. boxes.append(np.array(b).reshape([-1, 2]))
  102. return np.array(boxes, dtype=np.float32), scores
  103. def get_boundary(self, score_maps, shape_list):
  104. assert len(score_maps) == len(self.scales)
  105. boundaries = []
  106. for idx, score_map in enumerate(score_maps):
  107. scale = self.scales[idx]
  108. boundaries = boundaries + self._get_boundary_single(score_map,
  109. scale)
  110. # nms
  111. boundaries = poly_nms(boundaries, self.nms_thr)
  112. boundaries, scores = self.resize_boundary(
  113. boundaries, (1 / shape_list[0, 2:]).tolist()[::-1])
  114. boxes_batch = [dict(points=boundaries, scores=scores)]
  115. return boxes_batch
  116. def _get_boundary_single(self, score_map, scale):
  117. assert len(score_map) == 2
  118. assert score_map[1].shape[1] == 4 * self.fourier_degree + 2
  119. return self.fcenet_decode(
  120. preds=score_map,
  121. fourier_degree=self.fourier_degree,
  122. num_reconstr_points=self.num_reconstr_points,
  123. scale=scale,
  124. alpha=self.alpha,
  125. beta=self.beta,
  126. box_type=self.box_type,
  127. score_thr=self.score_thr,
  128. nms_thr=self.nms_thr)
  129. def fcenet_decode(self,
  130. preds,
  131. fourier_degree,
  132. num_reconstr_points,
  133. scale,
  134. alpha=1.0,
  135. beta=2.0,
  136. box_type='poly',
  137. score_thr=0.3,
  138. nms_thr=0.1):
  139. """Decoding predictions of FCENet to instances.
  140. Args:
  141. preds (list(Tensor)): The head output tensors.
  142. fourier_degree (int): The maximum Fourier transform degree k.
  143. num_reconstr_points (int): The points number of the polygon
  144. reconstructed from predicted Fourier coefficients.
  145. scale (int): The down-sample scale of the prediction.
  146. alpha (float) : The parameter to calculate final scores. Score_{final}
  147. = (Score_{text region} ^ alpha)
  148. * (Score_{text center region}^ beta)
  149. beta (float) : The parameter to calculate final score.
  150. box_type (str): Boundary encoding type 'poly' or 'quad'.
  151. score_thr (float) : The threshold used to filter out the final
  152. candidates.
  153. nms_thr (float) : The threshold of nms.
  154. Returns:
  155. boundaries (list[list[float]]): The instance boundary and confidence
  156. list.
  157. """
  158. assert isinstance(preds, list)
  159. assert len(preds) == 2
  160. assert box_type in ['poly', 'quad']
  161. cls_pred = preds[0][0]
  162. tr_pred = cls_pred[0:2]
  163. tcl_pred = cls_pred[2:]
  164. reg_pred = preds[1][0].transpose([1, 2, 0])
  165. x_pred = reg_pred[:, :, :2 * fourier_degree + 1]
  166. y_pred = reg_pred[:, :, 2 * fourier_degree + 1:]
  167. score_pred = (tr_pred[1]**alpha) * (tcl_pred[1]**beta)
  168. tr_pred_mask = (score_pred) > score_thr
  169. tr_mask = fill_hole(tr_pred_mask)
  170. tr_contours, _ = cv2.findContours(
  171. tr_mask.astype(np.uint8), cv2.RETR_TREE,
  172. cv2.CHAIN_APPROX_SIMPLE) # opencv4
  173. mask = np.zeros_like(tr_mask)
  174. boundaries = []
  175. for cont in tr_contours:
  176. deal_map = mask.copy().astype(np.int8)
  177. cv2.drawContours(deal_map, [cont], -1, 1, -1)
  178. score_map = score_pred * deal_map
  179. score_mask = score_map > 0
  180. xy_text = np.argwhere(score_mask)
  181. dxy = xy_text[:, 1] + xy_text[:, 0] * 1j
  182. x, y = x_pred[score_mask], y_pred[score_mask]
  183. c = x + y * 1j
  184. c[:, fourier_degree] = c[:, fourier_degree] + dxy
  185. c *= scale
  186. polygons = fourier2poly(c, num_reconstr_points)
  187. score = score_map[score_mask].reshape(-1, 1)
  188. polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr)
  189. boundaries = boundaries + polygons
  190. boundaries = poly_nms(boundaries, nms_thr)
  191. if box_type == 'quad':
  192. new_boundaries = []
  193. for boundary in boundaries:
  194. poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32)
  195. score = boundary[-1]
  196. points = cv2.boxPoints(cv2.minAreaRect(poly))
  197. points = np.int0(points)
  198. new_boundaries.append(points.reshape(-1).tolist() + [score])
  199. boundaries = new_boundaries
  200. return boundaries