gen.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/zcswdt/Color_OCR_image_generator
  17. """
  18. import os
  19. import random
  20. from PIL import Image, ImageDraw, ImageFont
  21. import json
  22. import argparse
  23. def get_char_lines(txt_root_path):
  24. """
  25. desc:get corpus line
  26. """
  27. txt_files = os.listdir(txt_root_path)
  28. char_lines = []
  29. for txt in txt_files:
  30. f = open(os.path.join(txt_root_path, txt), mode='r', encoding='utf-8')
  31. lines = f.readlines()
  32. f.close()
  33. for line in lines:
  34. char_lines.append(line.strip())
  35. return char_lines
  36. def get_horizontal_text_picture(image_file, chars, fonts_list, cf):
  37. """
  38. desc:gen horizontal text picture
  39. """
  40. img = Image.open(image_file)
  41. if img.mode != 'RGB':
  42. img = img.convert('RGB')
  43. img_w, img_h = img.size
  44. # random choice font
  45. font_path = random.choice(fonts_list)
  46. # random choice font size
  47. font_size = random.randint(cf.font_min_size, cf.font_max_size)
  48. font = ImageFont.truetype(font_path, font_size)
  49. ch_w = []
  50. ch_h = []
  51. for ch in chars:
  52. wt, ht = font.getsize(ch)
  53. ch_w.append(wt)
  54. ch_h.append(ht)
  55. f_w = sum(ch_w)
  56. f_h = max(ch_h)
  57. # add space
  58. char_space_width = max(ch_w)
  59. f_w += (char_space_width * (len(chars) - 1))
  60. x1 = random.randint(0, img_w - f_w)
  61. y1 = random.randint(0, img_h - f_h)
  62. x2 = x1 + f_w
  63. y2 = y1 + f_h
  64. crop_y1 = y1
  65. crop_x1 = x1
  66. crop_y2 = y2
  67. crop_x2 = x2
  68. best_color = (0, 0, 0)
  69. draw = ImageDraw.Draw(img)
  70. for i, ch in enumerate(chars):
  71. draw.text((x1, y1), ch, best_color, font=font)
  72. x1 += (ch_w[i] + char_space_width)
  73. crop_img = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
  74. return crop_img, chars
  75. def get_vertical_text_picture(image_file, chars, fonts_list, cf):
  76. """
  77. desc:gen vertical text picture
  78. """
  79. img = Image.open(image_file)
  80. if img.mode != 'RGB':
  81. img = img.convert('RGB')
  82. img_w, img_h = img.size
  83. # random choice font
  84. font_path = random.choice(fonts_list)
  85. # random choice font size
  86. font_size = random.randint(cf.font_min_size, cf.font_max_size)
  87. font = ImageFont.truetype(font_path, font_size)
  88. ch_w = []
  89. ch_h = []
  90. for ch in chars:
  91. wt, ht = font.getsize(ch)
  92. ch_w.append(wt)
  93. ch_h.append(ht)
  94. f_w = max(ch_w)
  95. f_h = sum(ch_h)
  96. x1 = random.randint(0, img_w - f_w)
  97. y1 = random.randint(0, img_h - f_h)
  98. x2 = x1 + f_w
  99. y2 = y1 + f_h
  100. crop_y1 = y1
  101. crop_x1 = x1
  102. crop_y2 = y2
  103. crop_x2 = x2
  104. best_color = (0, 0, 0)
  105. draw = ImageDraw.Draw(img)
  106. i = 0
  107. for ch in chars:
  108. draw.text((x1, y1), ch, best_color, font=font)
  109. y1 = y1 + ch_h[i]
  110. i = i + 1
  111. crop_img = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
  112. crop_img = crop_img.transpose(Image.ROTATE_90)
  113. return crop_img, chars
  114. def get_fonts(fonts_path):
  115. """
  116. desc: get all fonts
  117. """
  118. font_files = os.listdir(fonts_path)
  119. fonts_list=[]
  120. for font_file in font_files:
  121. font_path=os.path.join(fonts_path, font_file)
  122. fonts_list.append(font_path)
  123. return fonts_list
  124. if __name__ == '__main__':
  125. parser = argparse.ArgumentParser()
  126. parser.add_argument('--num_img', type=int, default=30, help="Number of images to generate")
  127. parser.add_argument('--font_min_size', type=int, default=11)
  128. parser.add_argument('--font_max_size', type=int, default=12,
  129. help="Help adjust the size of the generated text and the size of the picture")
  130. parser.add_argument('--bg_path', type=str, default='./background',
  131. help='The generated text pictures will be pasted onto the pictures of this folder')
  132. parser.add_argument('--det_bg_path', type=str, default='./det_background',
  133. help='The generated text pictures will use the pictures of this folder as the background')
  134. parser.add_argument('--fonts_path', type=str, default='../../StyleText/fonts',
  135. help='The font used to generate the picture')
  136. parser.add_argument('--corpus_path', type=str, default='./corpus',
  137. help='The corpus used to generate the text picture')
  138. parser.add_argument('--output_dir', type=str, default='./output/', help='Images save dir')
  139. cf = parser.parse_args()
  140. # save path
  141. if not os.path.exists(cf.output_dir):
  142. os.mkdir(cf.output_dir)
  143. # get corpus
  144. txt_root_path = cf.corpus_path
  145. char_lines = get_char_lines(txt_root_path=txt_root_path)
  146. # get all fonts
  147. fonts_path = cf.fonts_path
  148. fonts_list = get_fonts(fonts_path)
  149. # rec bg
  150. img_root_path = cf.bg_path
  151. imnames=os.listdir(img_root_path)
  152. # det bg
  153. det_bg_path = cf.det_bg_path
  154. bg_pics = os.listdir(det_bg_path)
  155. # OCR det files
  156. det_val_file = open(cf.output_dir + 'det_gt_val.txt', 'w', encoding='utf-8')
  157. det_train_file = open(cf.output_dir + 'det_gt_train.txt', 'w', encoding='utf-8')
  158. # det imgs
  159. det_save_dir = 'imgs/'
  160. if not os.path.exists(cf.output_dir + det_save_dir):
  161. os.mkdir(cf.output_dir + det_save_dir)
  162. det_val_save_dir = 'imgs_val/'
  163. if not os.path.exists(cf.output_dir + det_val_save_dir):
  164. os.mkdir(cf.output_dir + det_val_save_dir)
  165. # OCR rec files
  166. rec_val_file = open(cf.output_dir + 'rec_gt_val.txt', 'w', encoding='utf-8')
  167. rec_train_file = open(cf.output_dir + 'rec_gt_train.txt', 'w', encoding='utf-8')
  168. # rec imgs
  169. rec_save_dir = 'rec_imgs/'
  170. if not os.path.exists(cf.output_dir + rec_save_dir):
  171. os.mkdir(cf.output_dir + rec_save_dir)
  172. rec_val_save_dir = 'rec_imgs_val/'
  173. if not os.path.exists(cf.output_dir + rec_val_save_dir):
  174. os.mkdir(cf.output_dir + rec_val_save_dir)
  175. val_ratio = cf.num_img * 0.2 # val dataset ratio
  176. print('start generating...')
  177. for i in range(0, cf.num_img):
  178. imname = random.choice(imnames)
  179. img_path = os.path.join(img_root_path, imname)
  180. rnd = random.random()
  181. # gen horizontal text picture
  182. if rnd < 0.5:
  183. gen_img, chars = get_horizontal_text_picture(img_path, char_lines[i], fonts_list, cf)
  184. ori_w, ori_h = gen_img.size
  185. gen_img = gen_img.crop((0, 3, ori_w, ori_h))
  186. # gen vertical text picture
  187. else:
  188. gen_img, chars = get_vertical_text_picture(img_path, char_lines[i], fonts_list, cf)
  189. ori_w, ori_h = gen_img.size
  190. gen_img = gen_img.crop((3, 0, ori_w, ori_h))
  191. ori_w, ori_h = gen_img.size
  192. # rec imgs
  193. save_img_name = str(i).zfill(4) + '.jpg'
  194. if i < val_ratio:
  195. save_dir = os.path.join(rec_val_save_dir, save_img_name)
  196. line = save_dir + '\t' + char_lines[i] + '\n'
  197. rec_val_file.write(line)
  198. else:
  199. save_dir = os.path.join(rec_save_dir, save_img_name)
  200. line = save_dir + '\t' + char_lines[i] + '\n'
  201. rec_train_file.write(line)
  202. gen_img.save(cf.output_dir + save_dir, quality = 95, subsampling=0)
  203. # det img
  204. # random choice bg
  205. bg_pic = random.sample(bg_pics, 1)[0]
  206. det_img = Image.open(os.path.join(det_bg_path, bg_pic))
  207. # the PCB position is fixed, modify it according to your own scenario
  208. if bg_pic == '1.png':
  209. x1 = 38
  210. y1 = 3
  211. else:
  212. x1 = 34
  213. y1 = 1
  214. det_img.paste(gen_img, (x1, y1))
  215. # text pos
  216. chars_pos = [[x1, y1], [x1 + ori_w, y1], [x1 + ori_w, y1 + ori_h], [x1, y1 + ori_h]]
  217. label = [{"transcription":char_lines[i], "points":chars_pos}]
  218. if i < val_ratio:
  219. save_dir = os.path.join(det_val_save_dir, save_img_name)
  220. det_val_file.write(save_dir + '\t' + json.dumps(
  221. label, ensure_ascii=False) + '\n')
  222. else:
  223. save_dir = os.path.join(det_save_dir, save_img_name)
  224. det_train_file.write(save_dir + '\t' + json.dumps(
  225. label, ensure_ascii=False) + '\n')
  226. det_img.save(cf.output_dir + save_dir, quality = 95, subsampling=0)