ct_process.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import cv2
  16. import random
  17. import pyclipper
  18. import paddle
  19. import numpy as np
  20. import Polygon as plg
  21. import scipy.io as scio
  22. from PIL import Image
  23. import paddle.vision.transforms as transforms
  24. class RandomScale():
  25. def __init__(self, short_size=640, **kwargs):
  26. self.short_size = short_size
  27. def scale_aligned(self, img, scale):
  28. oh, ow = img.shape[0:2]
  29. h = int(oh * scale + 0.5)
  30. w = int(ow * scale + 0.5)
  31. if h % 32 != 0:
  32. h = h + (32 - h % 32)
  33. if w % 32 != 0:
  34. w = w + (32 - w % 32)
  35. img = cv2.resize(img, dsize=(w, h))
  36. factor_h = h / oh
  37. factor_w = w / ow
  38. return img, factor_h, factor_w
  39. def __call__(self, data):
  40. img = data['image']
  41. h, w = img.shape[0:2]
  42. random_scale = np.array([0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3])
  43. scale = (np.random.choice(random_scale) * self.short_size) / min(h, w)
  44. img, factor_h, factor_w = self.scale_aligned(img, scale)
  45. data['scale_factor'] = (factor_w, factor_h)
  46. data['image'] = img
  47. return data
  48. class MakeShrink():
  49. def __init__(self, kernel_scale=0.7, **kwargs):
  50. self.kernel_scale = kernel_scale
  51. def dist(self, a, b):
  52. return np.linalg.norm((a - b), ord=2, axis=0)
  53. def perimeter(self, bbox):
  54. peri = 0.0
  55. for i in range(bbox.shape[0]):
  56. peri += self.dist(bbox[i], bbox[(i + 1) % bbox.shape[0]])
  57. return peri
  58. def shrink(self, bboxes, rate, max_shr=20):
  59. rate = rate * rate
  60. shrinked_bboxes = []
  61. for bbox in bboxes:
  62. area = plg.Polygon(bbox).area()
  63. peri = self.perimeter(bbox)
  64. try:
  65. pco = pyclipper.PyclipperOffset()
  66. pco.AddPath(bbox, pyclipper.JT_ROUND,
  67. pyclipper.ET_CLOSEDPOLYGON)
  68. offset = min(
  69. int(area * (1 - rate) / (peri + 0.001) + 0.5), max_shr)
  70. shrinked_bbox = pco.Execute(-offset)
  71. if len(shrinked_bbox) == 0:
  72. shrinked_bboxes.append(bbox)
  73. continue
  74. shrinked_bbox = np.array(shrinked_bbox[0])
  75. if shrinked_bbox.shape[0] <= 2:
  76. shrinked_bboxes.append(bbox)
  77. continue
  78. shrinked_bboxes.append(shrinked_bbox)
  79. except Exception as e:
  80. shrinked_bboxes.append(bbox)
  81. return shrinked_bboxes
  82. def __call__(self, data):
  83. img = data['image']
  84. bboxes = data['polys']
  85. words = data['texts']
  86. scale_factor = data['scale_factor']
  87. gt_instance = np.zeros(img.shape[0:2], dtype='uint8') # h,w
  88. training_mask = np.ones(img.shape[0:2], dtype='uint8')
  89. training_mask_distance = np.ones(img.shape[0:2], dtype='uint8')
  90. for i in range(len(bboxes)):
  91. bboxes[i] = np.reshape(bboxes[i] * (
  92. [scale_factor[0], scale_factor[1]] * (bboxes[i].shape[0] // 2)),
  93. (bboxes[i].shape[0] // 2, 2)).astype('int32')
  94. for i in range(len(bboxes)):
  95. #different value for different bbox
  96. cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1)
  97. # set training mask to 0
  98. cv2.drawContours(training_mask, [bboxes[i]], -1, 0, -1)
  99. # for not accurate annotation, use training_mask_distance
  100. if words[i] == '###' or words[i] == '???':
  101. cv2.drawContours(training_mask_distance, [bboxes[i]], -1, 0, -1)
  102. # make shrink
  103. gt_kernel_instance = np.zeros(img.shape[0:2], dtype='uint8')
  104. kernel_bboxes = self.shrink(bboxes, self.kernel_scale)
  105. for i in range(len(bboxes)):
  106. cv2.drawContours(gt_kernel_instance, [kernel_bboxes[i]], -1, i + 1,
  107. -1)
  108. # for training mask, kernel and background= 1, box region=0
  109. if words[i] != '###' and words[i] != '???':
  110. cv2.drawContours(training_mask, [kernel_bboxes[i]], -1, 1, -1)
  111. gt_kernel = gt_kernel_instance.copy()
  112. # for gt_kernel, kernel = 1
  113. gt_kernel[gt_kernel > 0] = 1
  114. # shrink 2 times
  115. tmp1 = gt_kernel_instance.copy()
  116. erode_kernel = np.ones((3, 3), np.uint8)
  117. tmp1 = cv2.erode(tmp1, erode_kernel, iterations=1)
  118. tmp2 = tmp1.copy()
  119. tmp2 = cv2.erode(tmp2, erode_kernel, iterations=1)
  120. # compute text region
  121. gt_kernel_inner = tmp1 - tmp2
  122. # gt_instance: text instance, bg=0, diff word use diff value
  123. # training_mask: text instance mask, word=0,kernel and bg=1
  124. # gt_kernel_instance: text kernel instance, bg=0, diff word use diff value
  125. # gt_kernel: text_kernel, bg=0,diff word use same value
  126. # gt_kernel_inner: text kernel reference
  127. # training_mask_distance: word without anno = 0, else 1
  128. data['image'] = [
  129. img, gt_instance, training_mask, gt_kernel_instance, gt_kernel,
  130. gt_kernel_inner, training_mask_distance
  131. ]
  132. return data
  133. class GroupRandomHorizontalFlip():
  134. def __init__(self, p=0.5, **kwargs):
  135. self.p = p
  136. def __call__(self, data):
  137. imgs = data['image']
  138. if random.random() < self.p:
  139. for i in range(len(imgs)):
  140. imgs[i] = np.flip(imgs[i], axis=1).copy()
  141. data['image'] = imgs
  142. return data
  143. class GroupRandomRotate():
  144. def __init__(self, **kwargs):
  145. pass
  146. def __call__(self, data):
  147. imgs = data['image']
  148. max_angle = 10
  149. angle = random.random() * 2 * max_angle - max_angle
  150. for i in range(len(imgs)):
  151. img = imgs[i]
  152. w, h = img.shape[:2]
  153. rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1)
  154. img_rotation = cv2.warpAffine(
  155. img, rotation_matrix, (h, w), flags=cv2.INTER_NEAREST)
  156. imgs[i] = img_rotation
  157. data['image'] = imgs
  158. return data
  159. class GroupRandomCropPadding():
  160. def __init__(self, target_size=(640, 640), **kwargs):
  161. self.target_size = target_size
  162. def __call__(self, data):
  163. imgs = data['image']
  164. h, w = imgs[0].shape[0:2]
  165. t_w, t_h = self.target_size
  166. p_w, p_h = self.target_size
  167. if w == t_w and h == t_h:
  168. return data
  169. t_h = t_h if t_h < h else h
  170. t_w = t_w if t_w < w else w
  171. if random.random() > 3.0 / 8.0 and np.max(imgs[1]) > 0:
  172. # make sure to crop the text region
  173. tl = np.min(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
  174. tl[tl < 0] = 0
  175. br = np.max(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
  176. br[br < 0] = 0
  177. br[0] = min(br[0], h - t_h)
  178. br[1] = min(br[1], w - t_w)
  179. i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
  180. j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
  181. else:
  182. i = random.randint(0, h - t_h) if h - t_h > 0 else 0
  183. j = random.randint(0, w - t_w) if w - t_w > 0 else 0
  184. n_imgs = []
  185. for idx in range(len(imgs)):
  186. if len(imgs[idx].shape) == 3:
  187. s3_length = int(imgs[idx].shape[-1])
  188. img = imgs[idx][i:i + t_h, j:j + t_w, :]
  189. img_p = cv2.copyMakeBorder(
  190. img,
  191. 0,
  192. p_h - t_h,
  193. 0,
  194. p_w - t_w,
  195. borderType=cv2.BORDER_CONSTANT,
  196. value=tuple(0 for i in range(s3_length)))
  197. else:
  198. img = imgs[idx][i:i + t_h, j:j + t_w]
  199. img_p = cv2.copyMakeBorder(
  200. img,
  201. 0,
  202. p_h - t_h,
  203. 0,
  204. p_w - t_w,
  205. borderType=cv2.BORDER_CONSTANT,
  206. value=(0, ))
  207. n_imgs.append(img_p)
  208. data['image'] = n_imgs
  209. return data
  210. class MakeCentripetalShift():
  211. def __init__(self, **kwargs):
  212. pass
  213. def jaccard(self, As, Bs):
  214. A = As.shape[0] # small
  215. B = Bs.shape[0] # large
  216. dis = np.sqrt(
  217. np.sum((As[:, np.newaxis, :].repeat(
  218. B, axis=1) - Bs[np.newaxis, :, :].repeat(
  219. A, axis=0))**2,
  220. axis=-1))
  221. ind = np.argmin(dis, axis=-1)
  222. return ind
  223. def __call__(self, data):
  224. imgs = data['image']
  225. img, gt_instance, training_mask, gt_kernel_instance, gt_kernel, gt_kernel_inner, training_mask_distance = \
  226. imgs[0], imgs[1], imgs[2], imgs[3], imgs[4], imgs[5], imgs[6]
  227. max_instance = np.max(gt_instance) # num bbox
  228. # make centripetal shift
  229. gt_distance = np.zeros((2, *img.shape[0:2]), dtype=np.float32)
  230. for i in range(1, max_instance + 1):
  231. # kernel_reference
  232. ind = (gt_kernel_inner == i)
  233. if np.sum(ind) == 0:
  234. training_mask[gt_instance == i] = 0
  235. training_mask_distance[gt_instance == i] = 0
  236. continue
  237. kpoints = np.array(np.where(ind)).transpose(
  238. (1, 0))[:, ::-1].astype('float32')
  239. ind = (gt_instance == i) * (gt_kernel_instance == 0)
  240. if np.sum(ind) == 0:
  241. continue
  242. pixels = np.where(ind)
  243. points = np.array(pixels).transpose(
  244. (1, 0))[:, ::-1].astype('float32')
  245. bbox_ind = self.jaccard(points, kpoints)
  246. offset_gt = kpoints[bbox_ind] - points
  247. gt_distance[:, pixels[0], pixels[1]] = offset_gt.T * 0.1
  248. img = Image.fromarray(img)
  249. img = img.convert('RGB')
  250. data["image"] = img
  251. data["gt_kernel"] = gt_kernel.astype("int64")
  252. data["training_mask"] = training_mask.astype("int64")
  253. data["gt_instance"] = gt_instance.astype("int64")
  254. data["gt_kernel_instance"] = gt_kernel_instance.astype("int64")
  255. data["training_mask_distance"] = training_mask_distance.astype("int64")
  256. data["gt_distance"] = gt_distance.astype("float32")
  257. return data
  258. class ScaleAlignedShort():
  259. def __init__(self, short_size=640, **kwargs):
  260. self.short_size = short_size
  261. def __call__(self, data):
  262. img = data['image']
  263. org_img_shape = img.shape
  264. h, w = img.shape[0:2]
  265. scale = self.short_size * 1.0 / min(h, w)
  266. h = int(h * scale + 0.5)
  267. w = int(w * scale + 0.5)
  268. if h % 32 != 0:
  269. h = h + (32 - h % 32)
  270. if w % 32 != 0:
  271. w = w + (32 - w % 32)
  272. img = cv2.resize(img, dsize=(w, h))
  273. new_img_shape = img.shape
  274. img_shape = np.array(org_img_shape + new_img_shape)
  275. data['shape'] = img_shape
  276. data['image'] = img
  277. return data