det_r50_vd_sast_icdar15.yml 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. Global:
  2. use_gpu: true
  3. epoch_num: 5000
  4. log_smooth_window: 20
  5. print_batch_step: 2
  6. save_model_dir: ./output/sast_r50_vd_ic15/
  7. save_epoch_step: 1000
  8. # evaluation is run every 5000 iterations after the 4000th iteration
  9. eval_batch_step: [4000, 5000]
  10. cal_metric_during_train: False
  11. pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
  12. checkpoints:
  13. save_inference_dir:
  14. use_visualdl: False
  15. infer_img:
  16. save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
  17. Architecture:
  18. model_type: det
  19. algorithm: SAST
  20. Transform:
  21. Backbone:
  22. name: ResNet_SAST
  23. layers: 50
  24. Neck:
  25. name: SASTFPN
  26. with_cab: True
  27. Head:
  28. name: SASTHead
  29. Loss:
  30. name: SASTLoss
  31. Optimizer:
  32. name: Adam
  33. beta1: 0.9
  34. beta2: 0.999
  35. lr:
  36. # name: Cosine
  37. learning_rate: 0.001
  38. # warmup_epoch: 0
  39. regularizer:
  40. name: 'L2'
  41. factor: 0
  42. PostProcess:
  43. name: SASTPostProcess
  44. score_thresh: 0.5
  45. sample_pts_num: 2
  46. nms_thresh: 0.2
  47. expand_scale: 1.0
  48. shrink_ratio_of_width: 0.3
  49. Metric:
  50. name: DetMetric
  51. main_indicator: hmean
  52. Train:
  53. dataset:
  54. name: SimpleDataSet
  55. data_dir: ./train_data/
  56. label_file_list: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
  57. ratio_list: [0.1, 0.45, 0.3, 0.15]
  58. transforms:
  59. - DecodeImage: # load image
  60. img_mode: BGR
  61. channel_first: False
  62. - DetLabelEncode: # Class handling label
  63. - SASTProcessTrain:
  64. image_shape: [512, 512]
  65. min_crop_side_ratio: 0.3
  66. min_crop_size: 24
  67. min_text_size: 4
  68. max_text_size: 512
  69. - KeepKeys:
  70. keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
  71. loader:
  72. shuffle: True
  73. drop_last: False
  74. batch_size_per_card: 4
  75. num_workers: 4
  76. Eval:
  77. dataset:
  78. name: SimpleDataSet
  79. data_dir: ./train_data/icdar2015/text_localization/
  80. label_file_list:
  81. - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
  82. transforms:
  83. - DecodeImage: # load image
  84. img_mode: BGR
  85. channel_first: False
  86. - DetLabelEncode: # Class handling label
  87. - DetResizeForTest:
  88. resize_long: 1536
  89. - NormalizeImage:
  90. scale: 1./255.
  91. mean: [0.485, 0.456, 0.406]
  92. std: [0.229, 0.224, 0.225]
  93. order: 'hwc'
  94. - ToCHWImage:
  95. - KeepKeys:
  96. keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
  97. loader:
  98. shuffle: False
  99. drop_last: False
  100. batch_size_per_card: 1 # must be 1
  101. num_workers: 2