predict_system.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import sys
  16. import subprocess
  17. __dir__ = os.path.dirname(os.path.abspath(__file__))
  18. sys.path.append(__dir__)
  19. sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
  20. os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
  21. import cv2
  22. import copy
  23. import numpy as np
  24. import json
  25. import time
  26. import logging
  27. from PIL import Image
  28. import tools.infer.utility as utility
  29. import tools.infer.predict_rec as predict_rec
  30. import tools.infer.predict_det as predict_det
  31. import tools.infer.predict_cls as predict_cls
  32. from ppocr.utils.utility import get_image_file_list, check_and_read
  33. from ppocr.utils.logging import get_logger
  34. from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image, get_minarea_rect_crop
  35. logger = get_logger()
  36. class TextSystem(object):
  37. def __init__(self, args):
  38. if not args.show_log:
  39. logger.setLevel(logging.INFO)
  40. self.text_detector = predict_det.TextDetector(args)
  41. self.text_recognizer = predict_rec.TextRecognizer(args)
  42. self.use_angle_cls = args.use_angle_cls
  43. self.drop_score = args.drop_score
  44. if self.use_angle_cls:
  45. self.text_classifier = predict_cls.TextClassifier(args)
  46. self.args = args
  47. self.crop_image_res_index = 0
  48. def draw_crop_rec_res(self, output_dir, img_crop_list, rec_res):
  49. os.makedirs(output_dir, exist_ok=True)
  50. bbox_num = len(img_crop_list)
  51. for bno in range(bbox_num):
  52. cv2.imwrite(
  53. os.path.join(output_dir,
  54. f"mg_crop_{bno+self.crop_image_res_index}.jpg"),
  55. img_crop_list[bno])
  56. logger.debug(f"{bno}, {rec_res[bno]}")
  57. self.crop_image_res_index += bbox_num
  58. def __call__(self, img, cls=True):
  59. time_dict = {'det': 0, 'rec': 0, 'csl': 0, 'all': 0}
  60. start = time.time()
  61. ori_im = img.copy()
  62. dt_boxes, elapse = self.text_detector(img)
  63. time_dict['det'] = elapse
  64. logger.debug("dt_boxes num : {}, elapse : {}".format(
  65. len(dt_boxes), elapse))
  66. if dt_boxes is None:
  67. return None, None
  68. img_crop_list = []
  69. dt_boxes = sorted_boxes(dt_boxes)
  70. for bno in range(len(dt_boxes)):
  71. tmp_box = copy.deepcopy(dt_boxes[bno])
  72. if self.args.det_box_type == "quad":
  73. img_crop = get_rotate_crop_image(ori_im, tmp_box)
  74. else:
  75. img_crop = get_minarea_rect_crop(ori_im, tmp_box)
  76. img_crop_list.append(img_crop)
  77. if self.use_angle_cls and cls:
  78. img_crop_list, angle_list, elapse = self.text_classifier(
  79. img_crop_list)
  80. time_dict['cls'] = elapse
  81. logger.debug("cls num : {}, elapse : {}".format(
  82. len(img_crop_list), elapse))
  83. rec_res, elapse = self.text_recognizer(img_crop_list)
  84. time_dict['rec'] = elapse
  85. logger.debug("rec_res num : {}, elapse : {}".format(
  86. len(rec_res), elapse))
  87. if self.args.save_crop_res:
  88. self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list,
  89. rec_res)
  90. filter_boxes, filter_rec_res = [], []
  91. for box, rec_result in zip(dt_boxes, rec_res):
  92. text, score = rec_result
  93. if score >= self.drop_score:
  94. filter_boxes.append(box)
  95. filter_rec_res.append(rec_result)
  96. end = time.time()
  97. time_dict['all'] = end - start
  98. return filter_boxes, filter_rec_res, time_dict
  99. def sorted_boxes(dt_boxes):
  100. """
  101. Sort text boxes in order from top to bottom, left to right
  102. args:
  103. dt_boxes(array):detected text boxes with shape [4, 2]
  104. return:
  105. sorted boxes(array) with shape [4, 2]
  106. """
  107. num_boxes = dt_boxes.shape[0]
  108. sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
  109. _boxes = list(sorted_boxes)
  110. for i in range(num_boxes - 1):
  111. for j in range(i, -1, -1):
  112. if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
  113. (_boxes[j + 1][0][0] < _boxes[j][0][0]):
  114. tmp = _boxes[j]
  115. _boxes[j] = _boxes[j + 1]
  116. _boxes[j + 1] = tmp
  117. else:
  118. break
  119. return _boxes
  120. def main(args):
  121. image_file_list = get_image_file_list(args.image_dir)
  122. image_file_list = image_file_list[args.process_id::args.total_process_num]
  123. text_sys = TextSystem(args)
  124. is_visualize = True
  125. font_path = args.vis_font_path
  126. drop_score = args.drop_score
  127. draw_img_save_dir = args.draw_img_save_dir
  128. os.makedirs(draw_img_save_dir, exist_ok=True)
  129. save_results = []
  130. logger.info(
  131. "In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', "
  132. "if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320"
  133. )
  134. # warm up 10 times
  135. if args.warmup:
  136. img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
  137. for i in range(10):
  138. res = text_sys(img)
  139. total_time = 0
  140. cpu_mem, gpu_mem, gpu_util = 0, 0, 0
  141. _st = time.time()
  142. count = 0
  143. for idx, image_file in enumerate(image_file_list):
  144. img, flag_gif, flag_pdf = check_and_read(image_file)
  145. if not flag_gif and not flag_pdf:
  146. img = cv2.imread(image_file)
  147. if not flag_pdf:
  148. if img is None:
  149. logger.debug("error in loading image:{}".format(image_file))
  150. continue
  151. imgs = [img]
  152. else:
  153. page_num = args.page_num
  154. if page_num > len(img) or page_num == 0:
  155. page_num = len(img)
  156. imgs = img[:page_num]
  157. for index, img in enumerate(imgs):
  158. starttime = time.time()
  159. dt_boxes, rec_res, time_dict = text_sys(img)
  160. elapse = time.time() - starttime
  161. total_time += elapse
  162. if len(imgs) > 1:
  163. logger.debug(
  164. str(idx) + '_' + str(index) + " Predict time of %s: %.3fs"
  165. % (image_file, elapse))
  166. else:
  167. logger.debug(
  168. str(idx) + " Predict time of %s: %.3fs" % (image_file,
  169. elapse))
  170. for text, score in rec_res:
  171. logger.debug("{}, {:.3f}".format(text, score))
  172. res = [{
  173. "transcription": rec_res[i][0],
  174. "points": np.array(dt_boxes[i]).astype(np.int32).tolist(),
  175. } for i in range(len(dt_boxes))]
  176. if len(imgs) > 1:
  177. save_pred = os.path.basename(image_file) + '_' + str(
  178. index) + "\t" + json.dumps(
  179. res, ensure_ascii=False) + "\n"
  180. else:
  181. save_pred = os.path.basename(image_file) + "\t" + json.dumps(
  182. res, ensure_ascii=False) + "\n"
  183. save_results.append(save_pred)
  184. if is_visualize:
  185. image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
  186. boxes = dt_boxes
  187. txts = [rec_res[i][0] for i in range(len(rec_res))]
  188. scores = [rec_res[i][1] for i in range(len(rec_res))]
  189. draw_img = draw_ocr_box_txt(
  190. image,
  191. boxes,
  192. txts,
  193. scores,
  194. drop_score=drop_score,
  195. font_path=font_path)
  196. if flag_gif:
  197. save_file = image_file[:-3] + "png"
  198. elif flag_pdf:
  199. save_file = image_file.replace('.pdf',
  200. '_' + str(index) + '.png')
  201. else:
  202. save_file = image_file
  203. cv2.imwrite(
  204. os.path.join(draw_img_save_dir,
  205. os.path.basename(save_file)),
  206. draw_img[:, :, ::-1])
  207. logger.debug("The visualized image saved in {}".format(
  208. os.path.join(draw_img_save_dir, os.path.basename(
  209. save_file))))
  210. logger.info("The predict total time is {}".format(time.time() - _st))
  211. if args.benchmark:
  212. text_sys.text_detector.autolog.report()
  213. text_sys.text_recognizer.autolog.report()
  214. with open(
  215. os.path.join(draw_img_save_dir, "system_results.txt"),
  216. 'w',
  217. encoding='utf-8') as f:
  218. f.writelines(save_results)
  219. if __name__ == "__main__":
  220. args = utility.parse_args()
  221. if args.use_mp:
  222. p_list = []
  223. total_process_num = args.total_process_num
  224. for process_id in range(total_process_num):
  225. cmd = [sys.executable, "-u"] + sys.argv + [
  226. "--process_id={}".format(process_id),
  227. "--use_mp={}".format(False)
  228. ]
  229. p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
  230. p_list.append(p)
  231. for p in p_list:
  232. p.wait()
  233. else:
  234. main(args)