eval_with_label_end2end.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import re
  16. import sys
  17. import shapely
  18. from shapely.geometry import Polygon
  19. import numpy as np
  20. from collections import defaultdict
  21. import operator
  22. from rapidfuzz.distance import Levenshtein
  23. import argparse
  24. import json
  25. import copy
  26. def parse_ser_results_fp(fp, fp_type="gt", ignore_background=True):
  27. # img/zh_val_0.jpg {
  28. # "height": 3508,
  29. # "width": 2480,
  30. # "ocr_info": [
  31. # {"text": "Maribyrnong", "label": "other", "bbox": [1958, 144, 2184, 198]},
  32. # {"text": "CITYCOUNCIL", "label": "other", "bbox": [2052, 183, 2171, 214]},
  33. # ]
  34. assert fp_type in ["gt", "pred"]
  35. key = "label" if fp_type == "gt" else "pred"
  36. res_dict = dict()
  37. with open(fp, "r", encoding='utf-8') as fin:
  38. lines = fin.readlines()
  39. for _, line in enumerate(lines):
  40. img_path, info = line.strip().split("\t")
  41. # get key
  42. image_name = os.path.basename(img_path)
  43. res_dict[image_name] = []
  44. # get infos
  45. json_info = json.loads(info)
  46. for single_ocr_info in json_info["ocr_info"]:
  47. label = single_ocr_info[key].upper()
  48. if label in ["O", "OTHERS", "OTHER"]:
  49. label = "O"
  50. if ignore_background and label == "O":
  51. continue
  52. single_ocr_info["label"] = label
  53. res_dict[image_name].append(copy.deepcopy(single_ocr_info))
  54. return res_dict
  55. def polygon_from_str(polygon_points):
  56. """
  57. Create a shapely polygon object from gt or dt line.
  58. """
  59. polygon_points = np.array(polygon_points).reshape(4, 2)
  60. polygon = Polygon(polygon_points).convex_hull
  61. return polygon
  62. def polygon_iou(poly1, poly2):
  63. """
  64. Intersection over union between two shapely polygons.
  65. """
  66. if not poly1.intersects(
  67. poly2): # this test is fast and can accelerate calculation
  68. iou = 0
  69. else:
  70. try:
  71. inter_area = poly1.intersection(poly2).area
  72. union_area = poly1.area + poly2.area - inter_area
  73. iou = float(inter_area) / union_area
  74. except shapely.geos.TopologicalError:
  75. # except Exception as e:
  76. # print(e)
  77. print('shapely.geos.TopologicalError occurred, iou set to 0')
  78. iou = 0
  79. return iou
  80. def ed(args, str1, str2):
  81. if args.ignore_space:
  82. str1 = str1.replace(" ", "")
  83. str2 = str2.replace(" ", "")
  84. if args.ignore_case:
  85. str1 = str1.lower()
  86. str2 = str2.lower()
  87. return Levenshtein.distance(str1, str2)
  88. def convert_bbox_to_polygon(bbox):
  89. """
  90. bbox : [x1, y1, x2, y2]
  91. output: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
  92. """
  93. xmin, ymin, xmax, ymax = bbox
  94. poly = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
  95. return poly
  96. def eval_e2e(args):
  97. # gt
  98. gt_results = parse_ser_results_fp(args.gt_json_path, "gt",
  99. args.ignore_background)
  100. # pred
  101. dt_results = parse_ser_results_fp(args.pred_json_path, "pred",
  102. args.ignore_background)
  103. iou_thresh = args.iou_thres
  104. num_gt_chars = 0
  105. gt_count = 0
  106. dt_count = 0
  107. hit = 0
  108. ed_sum = 0
  109. for img_name in dt_results:
  110. gt_info = gt_results[img_name]
  111. gt_count += len(gt_info)
  112. dt_info = dt_results[img_name]
  113. dt_count += len(dt_info)
  114. dt_match = [False] * len(dt_info)
  115. gt_match = [False] * len(gt_info)
  116. all_ious = defaultdict(tuple)
  117. # gt: {text, label, bbox or poly}
  118. for index_gt, gt in enumerate(gt_info):
  119. if "poly" not in gt:
  120. gt["poly"] = convert_bbox_to_polygon(gt["bbox"])
  121. gt_poly = polygon_from_str(gt["poly"])
  122. for index_dt, dt in enumerate(dt_info):
  123. if "poly" not in dt:
  124. dt["poly"] = convert_bbox_to_polygon(dt["bbox"])
  125. dt_poly = polygon_from_str(dt["poly"])
  126. iou = polygon_iou(dt_poly, gt_poly)
  127. if iou >= iou_thresh:
  128. all_ious[(index_gt, index_dt)] = iou
  129. sorted_ious = sorted(
  130. all_ious.items(), key=operator.itemgetter(1), reverse=True)
  131. sorted_gt_dt_pairs = [item[0] for item in sorted_ious]
  132. # matched gt and dt
  133. for gt_dt_pair in sorted_gt_dt_pairs:
  134. index_gt, index_dt = gt_dt_pair
  135. if gt_match[index_gt] == False and dt_match[index_dt] == False:
  136. gt_match[index_gt] = True
  137. dt_match[index_dt] = True
  138. # ocr rec results
  139. gt_text = gt_info[index_gt]["text"]
  140. dt_text = dt_info[index_dt]["text"]
  141. # ser results
  142. gt_label = gt_info[index_gt]["label"]
  143. dt_label = dt_info[index_dt]["pred"]
  144. if True: # ignore_masks[index_gt] == '0':
  145. ed_sum += ed(args, gt_text, dt_text)
  146. num_gt_chars += len(gt_text)
  147. if gt_text == dt_text:
  148. if args.ignore_ser_prediction or gt_label == dt_label:
  149. hit += 1
  150. # unmatched dt
  151. for tindex, dt_match_flag in enumerate(dt_match):
  152. if dt_match_flag == False:
  153. dt_text = dt_info[tindex]["text"]
  154. gt_text = ""
  155. ed_sum += ed(args, dt_text, gt_text)
  156. # unmatched gt
  157. for tindex, gt_match_flag in enumerate(gt_match):
  158. if gt_match_flag == False:
  159. dt_text = ""
  160. gt_text = gt_info[tindex]["text"]
  161. ed_sum += ed(args, gt_text, dt_text)
  162. num_gt_chars += len(gt_text)
  163. eps = 1e-9
  164. print("config: ", args)
  165. print('hit, dt_count, gt_count', hit, dt_count, gt_count)
  166. precision = hit / (dt_count + eps)
  167. recall = hit / (gt_count + eps)
  168. fmeasure = 2.0 * precision * recall / (precision + recall + eps)
  169. avg_edit_dist_img = ed_sum / len(gt_results)
  170. avg_edit_dist_field = ed_sum / (gt_count + eps)
  171. character_acc = 1 - ed_sum / (num_gt_chars + eps)
  172. print('character_acc: %.2f' % (character_acc * 100) + "%")
  173. print('avg_edit_dist_field: %.2f' % (avg_edit_dist_field))
  174. print('avg_edit_dist_img: %.2f' % (avg_edit_dist_img))
  175. print('precision: %.2f' % (precision * 100) + "%")
  176. print('recall: %.2f' % (recall * 100) + "%")
  177. print('fmeasure: %.2f' % (fmeasure * 100) + "%")
  178. return
  179. def parse_args():
  180. """
  181. """
  182. def str2bool(v):
  183. return v.lower() in ("true", "t", "1")
  184. parser = argparse.ArgumentParser()
  185. ## Required parameters
  186. parser.add_argument(
  187. "--gt_json_path",
  188. default=None,
  189. type=str,
  190. required=True, )
  191. parser.add_argument(
  192. "--pred_json_path",
  193. default=None,
  194. type=str,
  195. required=True, )
  196. parser.add_argument("--iou_thres", default=0.5, type=float)
  197. parser.add_argument(
  198. "--ignore_case",
  199. default=False,
  200. type=str2bool,
  201. help="whether to do lower case for the strs")
  202. parser.add_argument(
  203. "--ignore_space",
  204. default=True,
  205. type=str2bool,
  206. help="whether to ignore space")
  207. parser.add_argument(
  208. "--ignore_background",
  209. default=True,
  210. type=str2bool,
  211. help="whether to ignore other label")
  212. parser.add_argument(
  213. "--ignore_ser_prediction",
  214. default=False,
  215. type=str2bool,
  216. help="whether to ignore ocr pred results")
  217. args = parser.parse_args()
  218. return args
  219. if __name__ == '__main__':
  220. args = parse_args()
  221. eval_e2e(args)