fce_aug.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/transforms.py
  17. """
  18. import numpy as np
  19. from PIL import Image, ImageDraw
  20. import cv2
  21. from shapely.geometry import Polygon
  22. import math
  23. from ppocr.utils.poly_nms import poly_intersection
  24. class RandomScaling:
  25. def __init__(self, size=800, scale=(3. / 4, 5. / 2), **kwargs):
  26. """Random scale the image while keeping aspect.
  27. Args:
  28. size (int) : Base size before scaling.
  29. scale (tuple(float)) : The range of scaling.
  30. """
  31. assert isinstance(size, int)
  32. assert isinstance(scale, float) or isinstance(scale, tuple)
  33. self.size = size
  34. self.scale = scale if isinstance(scale, tuple) \
  35. else (1 - scale, 1 + scale)
  36. def __call__(self, data):
  37. image = data['image']
  38. text_polys = data['polys']
  39. h, w, _ = image.shape
  40. aspect_ratio = np.random.uniform(min(self.scale), max(self.scale))
  41. scales = self.size * 1.0 / max(h, w) * aspect_ratio
  42. scales = np.array([scales, scales])
  43. out_size = (int(h * scales[1]), int(w * scales[0]))
  44. image = cv2.resize(image, out_size[::-1])
  45. data['image'] = image
  46. text_polys[:, :, 0::2] = text_polys[:, :, 0::2] * scales[1]
  47. text_polys[:, :, 1::2] = text_polys[:, :, 1::2] * scales[0]
  48. data['polys'] = text_polys
  49. return data
  50. class RandomCropFlip:
  51. def __init__(self,
  52. pad_ratio=0.1,
  53. crop_ratio=0.5,
  54. iter_num=1,
  55. min_area_ratio=0.2,
  56. **kwargs):
  57. """Random crop and flip a patch of the image.
  58. Args:
  59. crop_ratio (float): The ratio of cropping.
  60. iter_num (int): Number of operations.
  61. min_area_ratio (float): Minimal area ratio between cropped patch
  62. and original image.
  63. """
  64. assert isinstance(crop_ratio, float)
  65. assert isinstance(iter_num, int)
  66. assert isinstance(min_area_ratio, float)
  67. self.pad_ratio = pad_ratio
  68. self.epsilon = 1e-2
  69. self.crop_ratio = crop_ratio
  70. self.iter_num = iter_num
  71. self.min_area_ratio = min_area_ratio
  72. def __call__(self, results):
  73. for i in range(self.iter_num):
  74. results = self.random_crop_flip(results)
  75. return results
  76. def random_crop_flip(self, results):
  77. image = results['image']
  78. polygons = results['polys']
  79. ignore_tags = results['ignore_tags']
  80. if len(polygons) == 0:
  81. return results
  82. if np.random.random() >= self.crop_ratio:
  83. return results
  84. h, w, _ = image.shape
  85. area = h * w
  86. pad_h = int(h * self.pad_ratio)
  87. pad_w = int(w * self.pad_ratio)
  88. h_axis, w_axis = self.generate_crop_target(image, polygons, pad_h,
  89. pad_w)
  90. if len(h_axis) == 0 or len(w_axis) == 0:
  91. return results
  92. attempt = 0
  93. while attempt < 50:
  94. attempt += 1
  95. polys_keep = []
  96. polys_new = []
  97. ignore_tags_keep = []
  98. ignore_tags_new = []
  99. xx = np.random.choice(w_axis, size=2)
  100. xmin = np.min(xx) - pad_w
  101. xmax = np.max(xx) - pad_w
  102. xmin = np.clip(xmin, 0, w - 1)
  103. xmax = np.clip(xmax, 0, w - 1)
  104. yy = np.random.choice(h_axis, size=2)
  105. ymin = np.min(yy) - pad_h
  106. ymax = np.max(yy) - pad_h
  107. ymin = np.clip(ymin, 0, h - 1)
  108. ymax = np.clip(ymax, 0, h - 1)
  109. if (xmax - xmin) * (ymax - ymin) < area * self.min_area_ratio:
  110. # area too small
  111. continue
  112. pts = np.stack([[xmin, xmax, xmax, xmin],
  113. [ymin, ymin, ymax, ymax]]).T.astype(np.int32)
  114. pp = Polygon(pts)
  115. fail_flag = False
  116. for polygon, ignore_tag in zip(polygons, ignore_tags):
  117. ppi = Polygon(polygon.reshape(-1, 2))
  118. ppiou, _ = poly_intersection(ppi, pp, buffer=0)
  119. if np.abs(ppiou - float(ppi.area)) > self.epsilon and \
  120. np.abs(ppiou) > self.epsilon:
  121. fail_flag = True
  122. break
  123. elif np.abs(ppiou - float(ppi.area)) < self.epsilon:
  124. polys_new.append(polygon)
  125. ignore_tags_new.append(ignore_tag)
  126. else:
  127. polys_keep.append(polygon)
  128. ignore_tags_keep.append(ignore_tag)
  129. if fail_flag:
  130. continue
  131. else:
  132. break
  133. cropped = image[ymin:ymax, xmin:xmax, :]
  134. select_type = np.random.randint(3)
  135. if select_type == 0:
  136. img = np.ascontiguousarray(cropped[:, ::-1])
  137. elif select_type == 1:
  138. img = np.ascontiguousarray(cropped[::-1, :])
  139. else:
  140. img = np.ascontiguousarray(cropped[::-1, ::-1])
  141. image[ymin:ymax, xmin:xmax, :] = img
  142. results['img'] = image
  143. if len(polys_new) != 0:
  144. height, width, _ = cropped.shape
  145. if select_type == 0:
  146. for idx, polygon in enumerate(polys_new):
  147. poly = polygon.reshape(-1, 2)
  148. poly[:, 0] = width - poly[:, 0] + 2 * xmin
  149. polys_new[idx] = poly
  150. elif select_type == 1:
  151. for idx, polygon in enumerate(polys_new):
  152. poly = polygon.reshape(-1, 2)
  153. poly[:, 1] = height - poly[:, 1] + 2 * ymin
  154. polys_new[idx] = poly
  155. else:
  156. for idx, polygon in enumerate(polys_new):
  157. poly = polygon.reshape(-1, 2)
  158. poly[:, 0] = width - poly[:, 0] + 2 * xmin
  159. poly[:, 1] = height - poly[:, 1] + 2 * ymin
  160. polys_new[idx] = poly
  161. polygons = polys_keep + polys_new
  162. ignore_tags = ignore_tags_keep + ignore_tags_new
  163. results['polys'] = np.array(polygons)
  164. results['ignore_tags'] = ignore_tags
  165. return results
  166. def generate_crop_target(self, image, all_polys, pad_h, pad_w):
  167. """Generate crop target and make sure not to crop the polygon
  168. instances.
  169. Args:
  170. image (ndarray): The image waited to be crop.
  171. all_polys (list[list[ndarray]]): All polygons including ground
  172. truth polygons and ground truth ignored polygons.
  173. pad_h (int): Padding length of height.
  174. pad_w (int): Padding length of width.
  175. Returns:
  176. h_axis (ndarray): Vertical cropping range.
  177. w_axis (ndarray): Horizontal cropping range.
  178. """
  179. h, w, _ = image.shape
  180. h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
  181. w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
  182. text_polys = []
  183. for polygon in all_polys:
  184. rect = cv2.minAreaRect(polygon.astype(np.int32).reshape(-1, 2))
  185. box = cv2.boxPoints(rect)
  186. box = np.int0(box)
  187. text_polys.append([box[0], box[1], box[2], box[3]])
  188. polys = np.array(text_polys, dtype=np.int32)
  189. for poly in polys:
  190. poly = np.round(poly, decimals=0).astype(np.int32)
  191. minx = np.min(poly[:, 0])
  192. maxx = np.max(poly[:, 0])
  193. w_array[minx + pad_w:maxx + pad_w] = 1
  194. miny = np.min(poly[:, 1])
  195. maxy = np.max(poly[:, 1])
  196. h_array[miny + pad_h:maxy + pad_h] = 1
  197. h_axis = np.where(h_array == 0)[0]
  198. w_axis = np.where(w_array == 0)[0]
  199. return h_axis, w_axis
  200. class RandomCropPolyInstances:
  201. """Randomly crop images and make sure to contain at least one intact
  202. instance."""
  203. def __init__(self, crop_ratio=5.0 / 8.0, min_side_ratio=0.4, **kwargs):
  204. super().__init__()
  205. self.crop_ratio = crop_ratio
  206. self.min_side_ratio = min_side_ratio
  207. def sample_valid_start_end(self, valid_array, min_len, max_start, min_end):
  208. assert isinstance(min_len, int)
  209. assert len(valid_array) > min_len
  210. start_array = valid_array.copy()
  211. max_start = min(len(start_array) - min_len, max_start)
  212. start_array[max_start:] = 0
  213. start_array[0] = 1
  214. diff_array = np.hstack([0, start_array]) - np.hstack([start_array, 0])
  215. region_starts = np.where(diff_array < 0)[0]
  216. region_ends = np.where(diff_array > 0)[0]
  217. region_ind = np.random.randint(0, len(region_starts))
  218. start = np.random.randint(region_starts[region_ind],
  219. region_ends[region_ind])
  220. end_array = valid_array.copy()
  221. min_end = max(start + min_len, min_end)
  222. end_array[:min_end] = 0
  223. end_array[-1] = 1
  224. diff_array = np.hstack([0, end_array]) - np.hstack([end_array, 0])
  225. region_starts = np.where(diff_array < 0)[0]
  226. region_ends = np.where(diff_array > 0)[0]
  227. region_ind = np.random.randint(0, len(region_starts))
  228. end = np.random.randint(region_starts[region_ind],
  229. region_ends[region_ind])
  230. return start, end
  231. def sample_crop_box(self, img_size, results):
  232. """Generate crop box and make sure not to crop the polygon instances.
  233. Args:
  234. img_size (tuple(int)): The image size (h, w).
  235. results (dict): The results dict.
  236. """
  237. assert isinstance(img_size, tuple)
  238. h, w = img_size[:2]
  239. key_masks = results['polys']
  240. x_valid_array = np.ones(w, dtype=np.int32)
  241. y_valid_array = np.ones(h, dtype=np.int32)
  242. selected_mask = key_masks[np.random.randint(0, len(key_masks))]
  243. selected_mask = selected_mask.reshape((-1, 2)).astype(np.int32)
  244. max_x_start = max(np.min(selected_mask[:, 0]) - 2, 0)
  245. min_x_end = min(np.max(selected_mask[:, 0]) + 3, w - 1)
  246. max_y_start = max(np.min(selected_mask[:, 1]) - 2, 0)
  247. min_y_end = min(np.max(selected_mask[:, 1]) + 3, h - 1)
  248. for mask in key_masks:
  249. mask = mask.reshape((-1, 2)).astype(np.int32)
  250. clip_x = np.clip(mask[:, 0], 0, w - 1)
  251. clip_y = np.clip(mask[:, 1], 0, h - 1)
  252. min_x, max_x = np.min(clip_x), np.max(clip_x)
  253. min_y, max_y = np.min(clip_y), np.max(clip_y)
  254. x_valid_array[min_x - 2:max_x + 3] = 0
  255. y_valid_array[min_y - 2:max_y + 3] = 0
  256. min_w = int(w * self.min_side_ratio)
  257. min_h = int(h * self.min_side_ratio)
  258. x1, x2 = self.sample_valid_start_end(x_valid_array, min_w, max_x_start,
  259. min_x_end)
  260. y1, y2 = self.sample_valid_start_end(y_valid_array, min_h, max_y_start,
  261. min_y_end)
  262. return np.array([x1, y1, x2, y2])
  263. def crop_img(self, img, bbox):
  264. assert img.ndim == 3
  265. h, w, _ = img.shape
  266. assert 0 <= bbox[1] < bbox[3] <= h
  267. assert 0 <= bbox[0] < bbox[2] <= w
  268. return img[bbox[1]:bbox[3], bbox[0]:bbox[2]]
  269. def __call__(self, results):
  270. image = results['image']
  271. polygons = results['polys']
  272. ignore_tags = results['ignore_tags']
  273. if len(polygons) < 1:
  274. return results
  275. if np.random.random_sample() < self.crop_ratio:
  276. crop_box = self.sample_crop_box(image.shape, results)
  277. img = self.crop_img(image, crop_box)
  278. results['image'] = img
  279. # crop and filter masks
  280. x1, y1, x2, y2 = crop_box
  281. w = max(x2 - x1, 1)
  282. h = max(y2 - y1, 1)
  283. polygons[:, :, 0::2] = polygons[:, :, 0::2] - x1
  284. polygons[:, :, 1::2] = polygons[:, :, 1::2] - y1
  285. valid_masks_list = []
  286. valid_tags_list = []
  287. for ind, polygon in enumerate(polygons):
  288. if (polygon[:, ::2] > -4).all() and (
  289. polygon[:, ::2] < w + 4).all() and (
  290. polygon[:, 1::2] > -4).all() and (
  291. polygon[:, 1::2] < h + 4).all():
  292. polygon[:, ::2] = np.clip(polygon[:, ::2], 0, w)
  293. polygon[:, 1::2] = np.clip(polygon[:, 1::2], 0, h)
  294. valid_masks_list.append(polygon)
  295. valid_tags_list.append(ignore_tags[ind])
  296. results['polys'] = np.array(valid_masks_list)
  297. results['ignore_tags'] = valid_tags_list
  298. return results
  299. def __repr__(self):
  300. repr_str = self.__class__.__name__
  301. return repr_str
  302. class RandomRotatePolyInstances:
  303. def __init__(self,
  304. rotate_ratio=0.5,
  305. max_angle=10,
  306. pad_with_fixed_color=False,
  307. pad_value=(0, 0, 0),
  308. **kwargs):
  309. """Randomly rotate images and polygon masks.
  310. Args:
  311. rotate_ratio (float): The ratio of samples to operate rotation.
  312. max_angle (int): The maximum rotation angle.
  313. pad_with_fixed_color (bool): The flag for whether to pad rotated
  314. image with fixed value. If set to False, the rotated image will
  315. be padded onto cropped image.
  316. pad_value (tuple(int)): The color value for padding rotated image.
  317. """
  318. self.rotate_ratio = rotate_ratio
  319. self.max_angle = max_angle
  320. self.pad_with_fixed_color = pad_with_fixed_color
  321. self.pad_value = pad_value
  322. def rotate(self, center, points, theta, center_shift=(0, 0)):
  323. # rotate points.
  324. (center_x, center_y) = center
  325. center_y = -center_y
  326. x, y = points[:, ::2], points[:, 1::2]
  327. y = -y
  328. theta = theta / 180 * math.pi
  329. cos = math.cos(theta)
  330. sin = math.sin(theta)
  331. x = (x - center_x)
  332. y = (y - center_y)
  333. _x = center_x + x * cos - y * sin + center_shift[0]
  334. _y = -(center_y + x * sin + y * cos) + center_shift[1]
  335. points[:, ::2], points[:, 1::2] = _x, _y
  336. return points
  337. def cal_canvas_size(self, ori_size, degree):
  338. assert isinstance(ori_size, tuple)
  339. angle = degree * math.pi / 180.0
  340. h, w = ori_size[:2]
  341. cos = math.cos(angle)
  342. sin = math.sin(angle)
  343. canvas_h = int(w * math.fabs(sin) + h * math.fabs(cos))
  344. canvas_w = int(w * math.fabs(cos) + h * math.fabs(sin))
  345. canvas_size = (canvas_h, canvas_w)
  346. return canvas_size
  347. def sample_angle(self, max_angle):
  348. angle = np.random.random_sample() * 2 * max_angle - max_angle
  349. return angle
  350. def rotate_img(self, img, angle, canvas_size):
  351. h, w = img.shape[:2]
  352. rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
  353. rotation_matrix[0, 2] += int((canvas_size[1] - w) / 2)
  354. rotation_matrix[1, 2] += int((canvas_size[0] - h) / 2)
  355. if self.pad_with_fixed_color:
  356. target_img = cv2.warpAffine(
  357. img,
  358. rotation_matrix, (canvas_size[1], canvas_size[0]),
  359. flags=cv2.INTER_NEAREST,
  360. borderValue=self.pad_value)
  361. else:
  362. mask = np.zeros_like(img)
  363. (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8),
  364. np.random.randint(0, w * 7 // 8))
  365. img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)]
  366. img_cut = cv2.resize(img_cut, (canvas_size[1], canvas_size[0]))
  367. mask = cv2.warpAffine(
  368. mask,
  369. rotation_matrix, (canvas_size[1], canvas_size[0]),
  370. borderValue=[1, 1, 1])
  371. target_img = cv2.warpAffine(
  372. img,
  373. rotation_matrix, (canvas_size[1], canvas_size[0]),
  374. borderValue=[0, 0, 0])
  375. target_img = target_img + img_cut * mask
  376. return target_img
  377. def __call__(self, results):
  378. if np.random.random_sample() < self.rotate_ratio:
  379. image = results['image']
  380. polygons = results['polys']
  381. h, w = image.shape[:2]
  382. angle = self.sample_angle(self.max_angle)
  383. canvas_size = self.cal_canvas_size((h, w), angle)
  384. center_shift = (int((canvas_size[1] - w) / 2), int(
  385. (canvas_size[0] - h) / 2))
  386. image = self.rotate_img(image, angle, canvas_size)
  387. results['image'] = image
  388. # rotate polygons
  389. rotated_masks = []
  390. for mask in polygons:
  391. rotated_mask = self.rotate((w / 2, h / 2), mask, angle,
  392. center_shift)
  393. rotated_masks.append(rotated_mask)
  394. results['polys'] = np.array(rotated_masks)
  395. return results
  396. def __repr__(self):
  397. repr_str = self.__class__.__name__
  398. return repr_str
  399. class SquareResizePad:
  400. def __init__(self,
  401. target_size,
  402. pad_ratio=0.6,
  403. pad_with_fixed_color=False,
  404. pad_value=(0, 0, 0),
  405. **kwargs):
  406. """Resize or pad images to be square shape.
  407. Args:
  408. target_size (int): The target size of square shaped image.
  409. pad_with_fixed_color (bool): The flag for whether to pad rotated
  410. image with fixed value. If set to False, the rescales image will
  411. be padded onto cropped image.
  412. pad_value (tuple(int)): The color value for padding rotated image.
  413. """
  414. assert isinstance(target_size, int)
  415. assert isinstance(pad_ratio, float)
  416. assert isinstance(pad_with_fixed_color, bool)
  417. assert isinstance(pad_value, tuple)
  418. self.target_size = target_size
  419. self.pad_ratio = pad_ratio
  420. self.pad_with_fixed_color = pad_with_fixed_color
  421. self.pad_value = pad_value
  422. def resize_img(self, img, keep_ratio=True):
  423. h, w, _ = img.shape
  424. if keep_ratio:
  425. t_h = self.target_size if h >= w else int(h * self.target_size / w)
  426. t_w = self.target_size if h <= w else int(w * self.target_size / h)
  427. else:
  428. t_h = t_w = self.target_size
  429. img = cv2.resize(img, (t_w, t_h))
  430. return img, (t_h, t_w)
  431. def square_pad(self, img):
  432. h, w = img.shape[:2]
  433. if h == w:
  434. return img, (0, 0)
  435. pad_size = max(h, w)
  436. if self.pad_with_fixed_color:
  437. expand_img = np.ones((pad_size, pad_size, 3), dtype=np.uint8)
  438. expand_img[:] = self.pad_value
  439. else:
  440. (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8),
  441. np.random.randint(0, w * 7 // 8))
  442. img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)]
  443. expand_img = cv2.resize(img_cut, (pad_size, pad_size))
  444. if h > w:
  445. y0, x0 = 0, (h - w) // 2
  446. else:
  447. y0, x0 = (w - h) // 2, 0
  448. expand_img[y0:y0 + h, x0:x0 + w] = img
  449. offset = (x0, y0)
  450. return expand_img, offset
  451. def square_pad_mask(self, points, offset):
  452. x0, y0 = offset
  453. pad_points = points.copy()
  454. pad_points[::2] = pad_points[::2] + x0
  455. pad_points[1::2] = pad_points[1::2] + y0
  456. return pad_points
  457. def __call__(self, results):
  458. image = results['image']
  459. polygons = results['polys']
  460. h, w = image.shape[:2]
  461. if np.random.random_sample() < self.pad_ratio:
  462. image, out_size = self.resize_img(image, keep_ratio=True)
  463. image, offset = self.square_pad(image)
  464. else:
  465. image, out_size = self.resize_img(image, keep_ratio=False)
  466. offset = (0, 0)
  467. results['image'] = image
  468. try:
  469. polygons[:, :, 0::2] = polygons[:, :, 0::2] * out_size[
  470. 1] / w + offset[0]
  471. polygons[:, :, 1::2] = polygons[:, :, 1::2] * out_size[
  472. 0] / h + offset[1]
  473. except:
  474. pass
  475. results['polys'] = polygons
  476. return results
  477. def __repr__(self):
  478. repr_str = self.__class__.__name__
  479. return repr_str