123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241 |
- # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- This code is refer from:
- https://github.com/open-mmlab/mmocr/blob/v0.3.0/mmocr/models/textdet/postprocess/wrapper.py
- """
- import cv2
- import paddle
- import numpy as np
- from numpy.fft import ifft
- from ppocr.utils.poly_nms import poly_nms, valid_boundary
- def fill_hole(input_mask):
- h, w = input_mask.shape
- canvas = np.zeros((h + 2, w + 2), np.uint8)
- canvas[1:h + 1, 1:w + 1] = input_mask.copy()
- mask = np.zeros((h + 4, w + 4), np.uint8)
- cv2.floodFill(canvas, mask, (0, 0), 1)
- canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
- return ~canvas | input_mask
- def fourier2poly(fourier_coeff, num_reconstr_points=50):
- """ Inverse Fourier transform
- Args:
- fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1),
- with n and k being candidates number and Fourier degree
- respectively.
- num_reconstr_points (int): Number of reconstructed polygon points.
- Returns:
- Polygons (ndarray): The reconstructed polygons shaped (n, n')
- """
- a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex')
- k = (len(fourier_coeff[0]) - 1) // 2
- a[:, 0:k + 1] = fourier_coeff[:, k:]
- a[:, -k:] = fourier_coeff[:, :k]
- poly_complex = ifft(a) * num_reconstr_points
- polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2))
- polygon[:, :, 0] = poly_complex.real
- polygon[:, :, 1] = poly_complex.imag
- return polygon.astype('int32').reshape((len(fourier_coeff), -1))
- class FCEPostProcess(object):
- """
- The post process for FCENet.
- """
- def __init__(self,
- scales,
- fourier_degree=5,
- num_reconstr_points=50,
- decoding_type='fcenet',
- score_thr=0.3,
- nms_thr=0.1,
- alpha=1.0,
- beta=1.0,
- box_type='poly',
- **kwargs):
- self.scales = scales
- self.fourier_degree = fourier_degree
- self.num_reconstr_points = num_reconstr_points
- self.decoding_type = decoding_type
- self.score_thr = score_thr
- self.nms_thr = nms_thr
- self.alpha = alpha
- self.beta = beta
- self.box_type = box_type
- def __call__(self, preds, shape_list):
- score_maps = []
- for key, value in preds.items():
- if isinstance(value, paddle.Tensor):
- value = value.numpy()
- cls_res = value[:, :4, :, :]
- reg_res = value[:, 4:, :, :]
- score_maps.append([cls_res, reg_res])
- return self.get_boundary(score_maps, shape_list)
- def resize_boundary(self, boundaries, scale_factor):
- """Rescale boundaries via scale_factor.
- Args:
- boundaries (list[list[float]]): The boundary list. Each boundary
- with size 2k+1 with k>=4.
- scale_factor(ndarray): The scale factor of size (4,).
- Returns:
- boundaries (list[list[float]]): The scaled boundaries.
- """
- boxes = []
- scores = []
- for b in boundaries:
- sz = len(b)
- valid_boundary(b, True)
- scores.append(b[-1])
- b = (np.array(b[:sz - 1]) *
- (np.tile(scale_factor[:2], int(
- (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist()
- boxes.append(np.array(b).reshape([-1, 2]))
- return np.array(boxes, dtype=np.float32), scores
- def get_boundary(self, score_maps, shape_list):
- assert len(score_maps) == len(self.scales)
- boundaries = []
- for idx, score_map in enumerate(score_maps):
- scale = self.scales[idx]
- boundaries = boundaries + self._get_boundary_single(score_map,
- scale)
- # nms
- boundaries = poly_nms(boundaries, self.nms_thr)
- boundaries, scores = self.resize_boundary(
- boundaries, (1 / shape_list[0, 2:]).tolist()[::-1])
- boxes_batch = [dict(points=boundaries, scores=scores)]
- return boxes_batch
- def _get_boundary_single(self, score_map, scale):
- assert len(score_map) == 2
- assert score_map[1].shape[1] == 4 * self.fourier_degree + 2
- return self.fcenet_decode(
- preds=score_map,
- fourier_degree=self.fourier_degree,
- num_reconstr_points=self.num_reconstr_points,
- scale=scale,
- alpha=self.alpha,
- beta=self.beta,
- box_type=self.box_type,
- score_thr=self.score_thr,
- nms_thr=self.nms_thr)
- def fcenet_decode(self,
- preds,
- fourier_degree,
- num_reconstr_points,
- scale,
- alpha=1.0,
- beta=2.0,
- box_type='poly',
- score_thr=0.3,
- nms_thr=0.1):
- """Decoding predictions of FCENet to instances.
- Args:
- preds (list(Tensor)): The head output tensors.
- fourier_degree (int): The maximum Fourier transform degree k.
- num_reconstr_points (int): The points number of the polygon
- reconstructed from predicted Fourier coefficients.
- scale (int): The down-sample scale of the prediction.
- alpha (float) : The parameter to calculate final scores. Score_{final}
- = (Score_{text region} ^ alpha)
- * (Score_{text center region}^ beta)
- beta (float) : The parameter to calculate final score.
- box_type (str): Boundary encoding type 'poly' or 'quad'.
- score_thr (float) : The threshold used to filter out the final
- candidates.
- nms_thr (float) : The threshold of nms.
- Returns:
- boundaries (list[list[float]]): The instance boundary and confidence
- list.
- """
- assert isinstance(preds, list)
- assert len(preds) == 2
- assert box_type in ['poly', 'quad']
- cls_pred = preds[0][0]
- tr_pred = cls_pred[0:2]
- tcl_pred = cls_pred[2:]
- reg_pred = preds[1][0].transpose([1, 2, 0])
- x_pred = reg_pred[:, :, :2 * fourier_degree + 1]
- y_pred = reg_pred[:, :, 2 * fourier_degree + 1:]
- score_pred = (tr_pred[1]**alpha) * (tcl_pred[1]**beta)
- tr_pred_mask = (score_pred) > score_thr
- tr_mask = fill_hole(tr_pred_mask)
- tr_contours, _ = cv2.findContours(
- tr_mask.astype(np.uint8), cv2.RETR_TREE,
- cv2.CHAIN_APPROX_SIMPLE) # opencv4
- mask = np.zeros_like(tr_mask)
- boundaries = []
- for cont in tr_contours:
- deal_map = mask.copy().astype(np.int8)
- cv2.drawContours(deal_map, [cont], -1, 1, -1)
- score_map = score_pred * deal_map
- score_mask = score_map > 0
- xy_text = np.argwhere(score_mask)
- dxy = xy_text[:, 1] + xy_text[:, 0] * 1j
- x, y = x_pred[score_mask], y_pred[score_mask]
- c = x + y * 1j
- c[:, fourier_degree] = c[:, fourier_degree] + dxy
- c *= scale
- polygons = fourier2poly(c, num_reconstr_points)
- score = score_map[score_mask].reshape(-1, 1)
- polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr)
- boundaries = boundaries + polygons
- boundaries = poly_nms(boundaries, nms_thr)
- if box_type == 'quad':
- new_boundaries = []
- for boundary in boundaries:
- poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32)
- score = boundary[-1]
- points = cv2.boxPoints(cv2.minAreaRect(poly))
- points = np.int0(points)
- new_boundaries.append(points.reshape(-1).tolist() + [score])
- boundaries = new_boundaries
- return boundaries
|