sast_process.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. #Licensed under the Apache License, Version 2.0 (the "License");
  4. #you may not use this file except in compliance with the License.
  5. #You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. #Unless required by applicable law or agreed to in writing, software
  10. #distributed under the License is distributed on an "AS IS" BASIS,
  11. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. #See the License for the specific language governing permissions and
  13. #limitations under the License.
  14. """
  15. This part code is refered from:
  16. https://github.com/songdejia/EAST/blob/master/data_utils.py
  17. """
  18. import math
  19. import cv2
  20. import numpy as np
  21. import json
  22. import sys
  23. import os
  24. __all__ = ['SASTProcessTrain']
  25. class SASTProcessTrain(object):
  26. def __init__(self,
  27. image_shape=[512, 512],
  28. min_crop_size=24,
  29. min_crop_side_ratio=0.3,
  30. min_text_size=10,
  31. max_text_size=512,
  32. **kwargs):
  33. self.input_size = image_shape[1]
  34. self.min_crop_size = min_crop_size
  35. self.min_crop_side_ratio = min_crop_side_ratio
  36. self.min_text_size = min_text_size
  37. self.max_text_size = max_text_size
  38. def quad_area(self, poly):
  39. """
  40. compute area of a polygon
  41. :param poly:
  42. :return:
  43. """
  44. edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
  45. (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
  46. (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
  47. (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
  48. return np.sum(edge) / 2.
  49. def gen_quad_from_poly(self, poly):
  50. """
  51. Generate min area quad from poly.
  52. """
  53. point_num = poly.shape[0]
  54. min_area_quad = np.zeros((4, 2), dtype=np.float32)
  55. if True:
  56. rect = cv2.minAreaRect(poly.astype(
  57. np.int32)) # (center (x,y), (width, height), angle of rotation)
  58. center_point = rect[0]
  59. box = np.array(cv2.boxPoints(rect))
  60. first_point_idx = 0
  61. min_dist = 1e4
  62. for i in range(4):
  63. dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
  64. np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
  65. np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
  66. np.linalg.norm(box[(i + 3) % 4] - poly[-1])
  67. if dist < min_dist:
  68. min_dist = dist
  69. first_point_idx = i
  70. for i in range(4):
  71. min_area_quad[i] = box[(first_point_idx + i) % 4]
  72. return min_area_quad
  73. def check_and_validate_polys(self, polys, tags, xxx_todo_changeme):
  74. """
  75. check so that the text poly is in the same direction,
  76. and also filter some invalid polygons
  77. :param polys:
  78. :param tags:
  79. :return:
  80. """
  81. (h, w) = xxx_todo_changeme
  82. if polys.shape[0] == 0:
  83. return polys, np.array([]), np.array([])
  84. polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
  85. polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
  86. validated_polys = []
  87. validated_tags = []
  88. hv_tags = []
  89. for poly, tag in zip(polys, tags):
  90. quad = self.gen_quad_from_poly(poly)
  91. p_area = self.quad_area(quad)
  92. if abs(p_area) < 1:
  93. print('invalid poly')
  94. continue
  95. if p_area > 0:
  96. if tag == False:
  97. print('poly in wrong direction')
  98. tag = True # reversed cases should be ignore
  99. poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
  100. 1), :]
  101. quad = quad[(0, 3, 2, 1), :]
  102. len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] -
  103. quad[2])
  104. len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] -
  105. quad[2])
  106. hv_tag = 1
  107. if len_w * 2.0 < len_h:
  108. hv_tag = 0
  109. validated_polys.append(poly)
  110. validated_tags.append(tag)
  111. hv_tags.append(hv_tag)
  112. return np.array(validated_polys), np.array(validated_tags), np.array(
  113. hv_tags)
  114. def crop_area(self,
  115. im,
  116. polys,
  117. tags,
  118. hv_tags,
  119. crop_background=False,
  120. max_tries=25):
  121. """
  122. make random crop from the input image
  123. :param im:
  124. :param polys:
  125. :param tags:
  126. :param crop_background:
  127. :param max_tries: 50 -> 25
  128. :return:
  129. """
  130. h, w, _ = im.shape
  131. pad_h = h // 10
  132. pad_w = w // 10
  133. h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
  134. w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
  135. for poly in polys:
  136. poly = np.round(poly, decimals=0).astype(np.int32)
  137. minx = np.min(poly[:, 0])
  138. maxx = np.max(poly[:, 0])
  139. w_array[minx + pad_w:maxx + pad_w] = 1
  140. miny = np.min(poly[:, 1])
  141. maxy = np.max(poly[:, 1])
  142. h_array[miny + pad_h:maxy + pad_h] = 1
  143. # ensure the cropped area not across a text
  144. h_axis = np.where(h_array == 0)[0]
  145. w_axis = np.where(w_array == 0)[0]
  146. if len(h_axis) == 0 or len(w_axis) == 0:
  147. return im, polys, tags, hv_tags
  148. for i in range(max_tries):
  149. xx = np.random.choice(w_axis, size=2)
  150. xmin = np.min(xx) - pad_w
  151. xmax = np.max(xx) - pad_w
  152. xmin = np.clip(xmin, 0, w - 1)
  153. xmax = np.clip(xmax, 0, w - 1)
  154. yy = np.random.choice(h_axis, size=2)
  155. ymin = np.min(yy) - pad_h
  156. ymax = np.max(yy) - pad_h
  157. ymin = np.clip(ymin, 0, h - 1)
  158. ymax = np.clip(ymax, 0, h - 1)
  159. # if xmax - xmin < ARGS.min_crop_side_ratio * w or \
  160. # ymax - ymin < ARGS.min_crop_side_ratio * h:
  161. if xmax - xmin < self.min_crop_size or \
  162. ymax - ymin < self.min_crop_size:
  163. # area too small
  164. continue
  165. if polys.shape[0] != 0:
  166. poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \
  167. & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax)
  168. selected_polys = np.where(
  169. np.sum(poly_axis_in_area, axis=1) == 4)[0]
  170. else:
  171. selected_polys = []
  172. if len(selected_polys) == 0:
  173. # no text in this area
  174. if crop_background:
  175. return im[ymin : ymax + 1, xmin : xmax + 1, :], \
  176. polys[selected_polys], tags[selected_polys], hv_tags[selected_polys]
  177. else:
  178. continue
  179. im = im[ymin:ymax + 1, xmin:xmax + 1, :]
  180. polys = polys[selected_polys]
  181. tags = tags[selected_polys]
  182. hv_tags = hv_tags[selected_polys]
  183. polys[:, :, 0] -= xmin
  184. polys[:, :, 1] -= ymin
  185. return im, polys, tags, hv_tags
  186. return im, polys, tags, hv_tags
  187. def generate_direction_map(self, poly_quads, direction_map):
  188. """
  189. """
  190. width_list = []
  191. height_list = []
  192. for quad in poly_quads:
  193. quad_w = (np.linalg.norm(quad[0] - quad[1]) +
  194. np.linalg.norm(quad[2] - quad[3])) / 2.0
  195. quad_h = (np.linalg.norm(quad[0] - quad[3]) +
  196. np.linalg.norm(quad[2] - quad[1])) / 2.0
  197. width_list.append(quad_w)
  198. height_list.append(quad_h)
  199. norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0)
  200. average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0)
  201. for quad in poly_quads:
  202. direct_vector_full = (
  203. (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0
  204. direct_vector = direct_vector_full / (
  205. np.linalg.norm(direct_vector_full) + 1e-6) * norm_width
  206. direction_label = tuple(
  207. map(float, [
  208. direct_vector[0], direct_vector[1], 1.0 / (average_height +
  209. 1e-6)
  210. ]))
  211. cv2.fillPoly(direction_map,
  212. quad.round().astype(np.int32)[np.newaxis, :, :],
  213. direction_label)
  214. return direction_map
  215. def calculate_average_height(self, poly_quads):
  216. """
  217. """
  218. height_list = []
  219. for quad in poly_quads:
  220. quad_h = (np.linalg.norm(quad[0] - quad[3]) +
  221. np.linalg.norm(quad[2] - quad[1])) / 2.0
  222. height_list.append(quad_h)
  223. average_height = max(sum(height_list) / len(height_list), 1.0)
  224. return average_height
  225. def generate_tcl_label(self,
  226. hw,
  227. polys,
  228. tags,
  229. ds_ratio,
  230. tcl_ratio=0.3,
  231. shrink_ratio_of_width=0.15):
  232. """
  233. Generate polygon.
  234. """
  235. h, w = hw
  236. h, w = int(h * ds_ratio), int(w * ds_ratio)
  237. polys = polys * ds_ratio
  238. score_map = np.zeros(
  239. (
  240. h,
  241. w, ), dtype=np.float32)
  242. tbo_map = np.zeros((h, w, 5), dtype=np.float32)
  243. training_mask = np.ones(
  244. (
  245. h,
  246. w, ), dtype=np.float32)
  247. direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape(
  248. [1, 1, 3]).astype(np.float32)
  249. for poly_idx, poly_tag in enumerate(zip(polys, tags)):
  250. poly = poly_tag[0]
  251. tag = poly_tag[1]
  252. # generate min_area_quad
  253. min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
  254. min_area_quad_h = 0.5 * (
  255. np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
  256. np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
  257. min_area_quad_w = 0.5 * (
  258. np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
  259. np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
  260. if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \
  261. or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio:
  262. continue
  263. if tag:
  264. # continue
  265. cv2.fillPoly(training_mask,
  266. poly.astype(np.int32)[np.newaxis, :, :], 0.15)
  267. else:
  268. tcl_poly = self.poly2tcl(poly, tcl_ratio)
  269. tcl_quads = self.poly2quads(tcl_poly)
  270. poly_quads = self.poly2quads(poly)
  271. # stcl map
  272. stcl_quads, quad_index = self.shrink_poly_along_width(
  273. tcl_quads,
  274. shrink_ratio_of_width=shrink_ratio_of_width,
  275. expand_height_ratio=1.0 / tcl_ratio)
  276. # generate tcl map
  277. cv2.fillPoly(score_map,
  278. np.round(stcl_quads).astype(np.int32), 1.0)
  279. # generate tbo map
  280. for idx, quad in enumerate(stcl_quads):
  281. quad_mask = np.zeros((h, w), dtype=np.float32)
  282. quad_mask = cv2.fillPoly(
  283. quad_mask,
  284. np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0)
  285. tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]],
  286. quad_mask, tbo_map)
  287. return score_map, tbo_map, training_mask
  288. def generate_tvo_and_tco(self,
  289. hw,
  290. polys,
  291. tags,
  292. tcl_ratio=0.3,
  293. ds_ratio=0.25):
  294. """
  295. Generate tcl map, tvo map and tbo map.
  296. """
  297. h, w = hw
  298. h, w = int(h * ds_ratio), int(w * ds_ratio)
  299. polys = polys * ds_ratio
  300. poly_mask = np.zeros((h, w), dtype=np.float32)
  301. tvo_map = np.ones((9, h, w), dtype=np.float32)
  302. tvo_map[0:-1:2] = np.tile(np.arange(0, w), (h, 1))
  303. tvo_map[1:-1:2] = np.tile(np.arange(0, w), (h, 1)).T
  304. poly_tv_xy_map = np.zeros((8, h, w), dtype=np.float32)
  305. # tco map
  306. tco_map = np.ones((3, h, w), dtype=np.float32)
  307. tco_map[0] = np.tile(np.arange(0, w), (h, 1))
  308. tco_map[1] = np.tile(np.arange(0, w), (h, 1)).T
  309. poly_tc_xy_map = np.zeros((2, h, w), dtype=np.float32)
  310. poly_short_edge_map = np.ones((h, w), dtype=np.float32)
  311. for poly, poly_tag in zip(polys, tags):
  312. if poly_tag == True:
  313. continue
  314. # adjust point order for vertical poly
  315. poly = self.adjust_point(poly)
  316. # generate min_area_quad
  317. min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
  318. min_area_quad_h = 0.5 * (
  319. np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
  320. np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
  321. min_area_quad_w = 0.5 * (
  322. np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
  323. np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
  324. # generate tcl map and text, 128 * 128
  325. tcl_poly = self.poly2tcl(poly, tcl_ratio)
  326. # generate poly_tv_xy_map
  327. for idx in range(4):
  328. cv2.fillPoly(
  329. poly_tv_xy_map[2 * idx],
  330. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  331. float(min(max(min_area_quad[idx, 0], 0), w)))
  332. cv2.fillPoly(
  333. poly_tv_xy_map[2 * idx + 1],
  334. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  335. float(min(max(min_area_quad[idx, 1], 0), h)))
  336. # generate poly_tc_xy_map
  337. for idx in range(2):
  338. cv2.fillPoly(
  339. poly_tc_xy_map[idx],
  340. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  341. float(center_point[idx]))
  342. # generate poly_short_edge_map
  343. cv2.fillPoly(
  344. poly_short_edge_map,
  345. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  346. float(max(min(min_area_quad_h, min_area_quad_w), 1.0)))
  347. # generate poly_mask and training_mask
  348. cv2.fillPoly(poly_mask,
  349. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  350. 1)
  351. tvo_map *= poly_mask
  352. tvo_map[:8] -= poly_tv_xy_map
  353. tvo_map[-1] /= poly_short_edge_map
  354. tvo_map = tvo_map.transpose((1, 2, 0))
  355. tco_map *= poly_mask
  356. tco_map[:2] -= poly_tc_xy_map
  357. tco_map[-1] /= poly_short_edge_map
  358. tco_map = tco_map.transpose((1, 2, 0))
  359. return tvo_map, tco_map
  360. def adjust_point(self, poly):
  361. """
  362. adjust point order.
  363. """
  364. point_num = poly.shape[0]
  365. if point_num == 4:
  366. len_1 = np.linalg.norm(poly[0] - poly[1])
  367. len_2 = np.linalg.norm(poly[1] - poly[2])
  368. len_3 = np.linalg.norm(poly[2] - poly[3])
  369. len_4 = np.linalg.norm(poly[3] - poly[0])
  370. if (len_1 + len_3) * 1.5 < (len_2 + len_4):
  371. poly = poly[[1, 2, 3, 0], :]
  372. elif point_num > 4:
  373. vector_1 = poly[0] - poly[1]
  374. vector_2 = poly[1] - poly[2]
  375. cos_theta = np.dot(vector_1, vector_2) / (
  376. np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6)
  377. theta = np.arccos(np.round(cos_theta, decimals=4))
  378. if abs(theta) > (70 / 180 * math.pi):
  379. index = list(range(1, point_num)) + [0]
  380. poly = poly[np.array(index), :]
  381. return poly
  382. def gen_min_area_quad_from_poly(self, poly):
  383. """
  384. Generate min area quad from poly.
  385. """
  386. point_num = poly.shape[0]
  387. min_area_quad = np.zeros((4, 2), dtype=np.float32)
  388. if point_num == 4:
  389. min_area_quad = poly
  390. center_point = np.sum(poly, axis=0) / 4
  391. else:
  392. rect = cv2.minAreaRect(poly.astype(
  393. np.int32)) # (center (x,y), (width, height), angle of rotation)
  394. center_point = rect[0]
  395. box = np.array(cv2.boxPoints(rect))
  396. first_point_idx = 0
  397. min_dist = 1e4
  398. for i in range(4):
  399. dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
  400. np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
  401. np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
  402. np.linalg.norm(box[(i + 3) % 4] - poly[-1])
  403. if dist < min_dist:
  404. min_dist = dist
  405. first_point_idx = i
  406. for i in range(4):
  407. min_area_quad[i] = box[(first_point_idx + i) % 4]
  408. return min_area_quad, center_point
  409. def shrink_quad_along_width(self,
  410. quad,
  411. begin_width_ratio=0.,
  412. end_width_ratio=1.):
  413. """
  414. Generate shrink_quad_along_width.
  415. """
  416. ratio_pair = np.array(
  417. [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
  418. p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
  419. p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
  420. return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
  421. def shrink_poly_along_width(self,
  422. quads,
  423. shrink_ratio_of_width,
  424. expand_height_ratio=1.0):
  425. """
  426. shrink poly with given length.
  427. """
  428. upper_edge_list = []
  429. def get_cut_info(edge_len_list, cut_len):
  430. for idx, edge_len in enumerate(edge_len_list):
  431. cut_len -= edge_len
  432. if cut_len <= 0.000001:
  433. ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx]
  434. return idx, ratio
  435. for quad in quads:
  436. upper_edge_len = np.linalg.norm(quad[0] - quad[1])
  437. upper_edge_list.append(upper_edge_len)
  438. # length of left edge and right edge.
  439. left_length = np.linalg.norm(quads[0][0] - quads[0][
  440. 3]) * expand_height_ratio
  441. right_length = np.linalg.norm(quads[-1][1] - quads[-1][
  442. 2]) * expand_height_ratio
  443. shrink_length = min(left_length, right_length,
  444. sum(upper_edge_list)) * shrink_ratio_of_width
  445. # shrinking length
  446. upper_len_left = shrink_length
  447. upper_len_right = sum(upper_edge_list) - shrink_length
  448. left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left)
  449. left_quad = self.shrink_quad_along_width(
  450. quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1)
  451. right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right)
  452. right_quad = self.shrink_quad_along_width(
  453. quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio)
  454. out_quad_list = []
  455. if left_idx == right_idx:
  456. out_quad_list.append(
  457. [left_quad[0], right_quad[1], right_quad[2], left_quad[3]])
  458. else:
  459. out_quad_list.append(left_quad)
  460. for idx in range(left_idx + 1, right_idx):
  461. out_quad_list.append(quads[idx])
  462. out_quad_list.append(right_quad)
  463. return np.array(out_quad_list), list(range(left_idx, right_idx + 1))
  464. def vector_angle(self, A, B):
  465. """
  466. Calculate the angle between vector AB and x-axis positive direction.
  467. """
  468. AB = np.array([B[1] - A[1], B[0] - A[0]])
  469. return np.arctan2(*AB)
  470. def theta_line_cross_point(self, theta, point):
  471. """
  472. Calculate the line through given point and angle in ax + by + c =0 form.
  473. """
  474. x, y = point
  475. cos = np.cos(theta)
  476. sin = np.sin(theta)
  477. return [sin, -cos, cos * y - sin * x]
  478. def line_cross_two_point(self, A, B):
  479. """
  480. Calculate the line through given point A and B in ax + by + c =0 form.
  481. """
  482. angle = self.vector_angle(A, B)
  483. return self.theta_line_cross_point(angle, A)
  484. def average_angle(self, poly):
  485. """
  486. Calculate the average angle between left and right edge in given poly.
  487. """
  488. p0, p1, p2, p3 = poly
  489. angle30 = self.vector_angle(p3, p0)
  490. angle21 = self.vector_angle(p2, p1)
  491. return (angle30 + angle21) / 2
  492. def line_cross_point(self, line1, line2):
  493. """
  494. line1 and line2 in 0=ax+by+c form, compute the cross point of line1 and line2
  495. """
  496. a1, b1, c1 = line1
  497. a2, b2, c2 = line2
  498. d = a1 * b2 - a2 * b1
  499. if d == 0:
  500. #print("line1", line1)
  501. #print("line2", line2)
  502. print('Cross point does not exist')
  503. return np.array([0, 0], dtype=np.float32)
  504. else:
  505. x = (b1 * c2 - b2 * c1) / d
  506. y = (a2 * c1 - a1 * c2) / d
  507. return np.array([x, y], dtype=np.float32)
  508. def quad2tcl(self, poly, ratio):
  509. """
  510. Generate center line by poly clock-wise point. (4, 2)
  511. """
  512. ratio_pair = np.array(
  513. [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
  514. p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair
  515. p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair
  516. return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]])
  517. def poly2tcl(self, poly, ratio):
  518. """
  519. Generate center line by poly clock-wise point.
  520. """
  521. ratio_pair = np.array(
  522. [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
  523. tcl_poly = np.zeros_like(poly)
  524. point_num = poly.shape[0]
  525. for idx in range(point_num // 2):
  526. point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx]
  527. ) * ratio_pair
  528. tcl_poly[idx] = point_pair[0]
  529. tcl_poly[point_num - 1 - idx] = point_pair[1]
  530. return tcl_poly
  531. def gen_quad_tbo(self, quad, tcl_mask, tbo_map):
  532. """
  533. Generate tbo_map for give quad.
  534. """
  535. # upper and lower line function: ax + by + c = 0;
  536. up_line = self.line_cross_two_point(quad[0], quad[1])
  537. lower_line = self.line_cross_two_point(quad[3], quad[2])
  538. quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) +
  539. np.linalg.norm(quad[1] - quad[2]))
  540. quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) +
  541. np.linalg.norm(quad[2] - quad[3]))
  542. # average angle of left and right line.
  543. angle = self.average_angle(quad)
  544. xy_in_poly = np.argwhere(tcl_mask == 1)
  545. for y, x in xy_in_poly:
  546. point = (x, y)
  547. line = self.theta_line_cross_point(angle, point)
  548. cross_point_upper = self.line_cross_point(up_line, line)
  549. cross_point_lower = self.line_cross_point(lower_line, line)
  550. ##FIX, offset reverse
  551. upper_offset_x, upper_offset_y = cross_point_upper - point
  552. lower_offset_x, lower_offset_y = cross_point_lower - point
  553. tbo_map[y, x, 0] = upper_offset_y
  554. tbo_map[y, x, 1] = upper_offset_x
  555. tbo_map[y, x, 2] = lower_offset_y
  556. tbo_map[y, x, 3] = lower_offset_x
  557. tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2
  558. return tbo_map
  559. def poly2quads(self, poly):
  560. """
  561. Split poly into quads.
  562. """
  563. quad_list = []
  564. point_num = poly.shape[0]
  565. # point pair
  566. point_pair_list = []
  567. for idx in range(point_num // 2):
  568. point_pair = [poly[idx], poly[point_num - 1 - idx]]
  569. point_pair_list.append(point_pair)
  570. quad_num = point_num // 2 - 1
  571. for idx in range(quad_num):
  572. # reshape and adjust to clock-wise
  573. quad_list.append((np.array(point_pair_list)[[idx, idx + 1]]
  574. ).reshape(4, 2)[[0, 2, 3, 1]])
  575. return np.array(quad_list)
  576. def __call__(self, data):
  577. im = data['image']
  578. text_polys = data['polys']
  579. text_tags = data['ignore_tags']
  580. if im is None:
  581. return None
  582. if text_polys.shape[0] == 0:
  583. return None
  584. h, w, _ = im.shape
  585. text_polys, text_tags, hv_tags = self.check_and_validate_polys(
  586. text_polys, text_tags, (h, w))
  587. if text_polys.shape[0] == 0:
  588. return None
  589. #set aspect ratio and keep area fix
  590. asp_scales = np.arange(1.0, 1.55, 0.1)
  591. asp_scale = np.random.choice(asp_scales)
  592. if np.random.rand() < 0.5:
  593. asp_scale = 1.0 / asp_scale
  594. asp_scale = math.sqrt(asp_scale)
  595. asp_wx = asp_scale
  596. asp_hy = 1.0 / asp_scale
  597. im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy)
  598. text_polys[:, :, 0] *= asp_wx
  599. text_polys[:, :, 1] *= asp_hy
  600. h, w, _ = im.shape
  601. if max(h, w) > 2048:
  602. rd_scale = 2048.0 / max(h, w)
  603. im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
  604. text_polys *= rd_scale
  605. h, w, _ = im.shape
  606. if min(h, w) < 16:
  607. return None
  608. #no background
  609. im, text_polys, text_tags, hv_tags = self.crop_area(im, \
  610. text_polys, text_tags, hv_tags, crop_background=False)
  611. if text_polys.shape[0] == 0:
  612. return None
  613. #continue for all ignore case
  614. if np.sum((text_tags * 1.0)) >= text_tags.size:
  615. return None
  616. new_h, new_w, _ = im.shape
  617. if (new_h is None) or (new_w is None):
  618. return None
  619. #resize image
  620. std_ratio = float(self.input_size) / max(new_w, new_h)
  621. rand_scales = np.array(
  622. [0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0])
  623. rz_scale = std_ratio * np.random.choice(rand_scales)
  624. im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale)
  625. text_polys[:, :, 0] *= rz_scale
  626. text_polys[:, :, 1] *= rz_scale
  627. #add gaussian blur
  628. if np.random.rand() < 0.1 * 0.5:
  629. ks = np.random.permutation(5)[0] + 1
  630. ks = int(ks / 2) * 2 + 1
  631. im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0)
  632. #add brighter
  633. if np.random.rand() < 0.1 * 0.5:
  634. im = im * (1.0 + np.random.rand() * 0.5)
  635. im = np.clip(im, 0.0, 255.0)
  636. #add darker
  637. if np.random.rand() < 0.1 * 0.5:
  638. im = im * (1.0 - np.random.rand() * 0.5)
  639. im = np.clip(im, 0.0, 255.0)
  640. # Padding the im to [input_size, input_size]
  641. new_h, new_w, _ = im.shape
  642. if min(new_w, new_h) < self.input_size * 0.5:
  643. return None
  644. im_padded = np.ones(
  645. (self.input_size, self.input_size, 3), dtype=np.float32)
  646. im_padded[:, :, 2] = 0.485 * 255
  647. im_padded[:, :, 1] = 0.456 * 255
  648. im_padded[:, :, 0] = 0.406 * 255
  649. # Random the start position
  650. del_h = self.input_size - new_h
  651. del_w = self.input_size - new_w
  652. sh, sw = 0, 0
  653. if del_h > 1:
  654. sh = int(np.random.rand() * del_h)
  655. if del_w > 1:
  656. sw = int(np.random.rand() * del_w)
  657. # Padding
  658. im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy()
  659. text_polys[:, :, 0] += sw
  660. text_polys[:, :, 1] += sh
  661. score_map, border_map, training_mask = self.generate_tcl_label(
  662. (self.input_size, self.input_size), text_polys, text_tags, 0.25)
  663. # SAST head
  664. tvo_map, tco_map = self.generate_tvo_and_tco(
  665. (self.input_size, self.input_size),
  666. text_polys,
  667. text_tags,
  668. tcl_ratio=0.3,
  669. ds_ratio=0.25)
  670. # print("test--------tvo_map shape:", tvo_map.shape)
  671. im_padded[:, :, 2] -= 0.485 * 255
  672. im_padded[:, :, 1] -= 0.456 * 255
  673. im_padded[:, :, 0] -= 0.406 * 255
  674. im_padded[:, :, 2] /= (255.0 * 0.229)
  675. im_padded[:, :, 1] /= (255.0 * 0.224)
  676. im_padded[:, :, 0] /= (255.0 * 0.225)
  677. im_padded = im_padded.transpose((2, 0, 1))
  678. data['image'] = im_padded[::-1, :, :]
  679. data['score_map'] = score_map[np.newaxis, :, :]
  680. data['border_map'] = border_map.transpose((2, 0, 1))
  681. data['training_mask'] = training_mask[np.newaxis, :, :]
  682. data['tvo_map'] = tvo_map.transpose((2, 0, 1))
  683. data['tco_map'] = tco_map.transpose((2, 0, 1))
  684. return data