det_r50_drrg_ctw.yml 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. Global:
  2. use_gpu: true
  3. epoch_num: 1200
  4. log_smooth_window: 20
  5. print_batch_step: 5
  6. save_model_dir: ./output/det_r50_drrg_ctw/
  7. save_epoch_step: 100
  8. # evaluation is run every 1260 iterations
  9. eval_batch_step: [37800, 1260]
  10. cal_metric_during_train: False
  11. pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained.pdparams
  12. checkpoints:
  13. save_inference_dir:
  14. use_visualdl: False
  15. infer_img: doc/imgs_en/img_10.jpg
  16. save_res_path: ./output/det_drrg/predicts_drrg.txt
  17. Architecture:
  18. model_type: det
  19. algorithm: DRRG
  20. Transform:
  21. Backbone:
  22. name: ResNet_vd
  23. layers: 50
  24. Neck:
  25. name: FPN_UNet
  26. in_channels: [256, 512, 1024, 2048]
  27. out_channels: 32
  28. Head:
  29. name: DRRGHead
  30. in_channels: 32
  31. text_region_thr: 0.3
  32. center_region_thr: 0.4
  33. Loss:
  34. name: DRRGLoss
  35. Optimizer:
  36. name: Momentum
  37. momentum: 0.9
  38. lr:
  39. name: DecayLearningRate
  40. learning_rate: 0.028
  41. epochs: 1200
  42. factor: 0.9
  43. end_lr: 0.0000001
  44. weight_decay: 0.0001
  45. PostProcess:
  46. name: DRRGPostprocess
  47. link_thr: 0.8
  48. Metric:
  49. name: DetFCEMetric
  50. main_indicator: hmean
  51. Train:
  52. dataset:
  53. name: SimpleDataSet
  54. data_dir: ./train_data/ctw1500/imgs/
  55. label_file_list:
  56. - ./train_data/ctw1500/imgs/training.txt
  57. transforms:
  58. - DecodeImage: # load image
  59. img_mode: BGR
  60. channel_first: False
  61. ignore_orientation: True
  62. - DetLabelEncode: # Class handling label
  63. - ColorJitter:
  64. brightness: 0.12549019607843137
  65. saturation: 0.5
  66. - RandomScaling:
  67. - RandomCropFlip:
  68. crop_ratio: 0.5
  69. - RandomCropPolyInstances:
  70. crop_ratio: 0.8
  71. min_side_ratio: 0.3
  72. - RandomRotatePolyInstances:
  73. rotate_ratio: 0.5
  74. max_angle: 60
  75. pad_with_fixed_color: False
  76. - SquareResizePad:
  77. target_size: 800
  78. pad_ratio: 0.6
  79. - IaaAugment:
  80. augmenter_args:
  81. - { 'type': Fliplr, 'args': { 'p': 0.5 } }
  82. - DRRGTargets:
  83. - NormalizeImage:
  84. scale: 1./255.
  85. mean: [0.485, 0.456, 0.406]
  86. std: [0.229, 0.224, 0.225]
  87. order: 'hwc'
  88. - ToCHWImage:
  89. - KeepKeys:
  90. keep_keys: ['image', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
  91. 'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
  92. 'gt_cos_map', 'gt_comp_attribs'] # dataloader will return list in this order
  93. loader:
  94. shuffle: True
  95. drop_last: False
  96. batch_size_per_card: 4
  97. num_workers: 8
  98. Eval:
  99. dataset:
  100. name: SimpleDataSet
  101. data_dir: ./train_data/ctw1500/imgs/
  102. label_file_list:
  103. - ./train_data/ctw1500/imgs/test.txt
  104. transforms:
  105. - DecodeImage: # load image
  106. img_mode: BGR
  107. channel_first: False
  108. ignore_orientation: True
  109. - DetLabelEncode: # Class handling label
  110. - DetResizeForTest:
  111. limit_type: 'min'
  112. limit_side_len: 640
  113. - NormalizeImage:
  114. scale: 1./255.
  115. mean: [0.485, 0.456, 0.406]
  116. std: [0.229, 0.224, 0.225]
  117. order: 'hwc'
  118. - Pad:
  119. - ToCHWImage:
  120. - KeepKeys:
  121. keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
  122. loader:
  123. shuffle: False
  124. drop_last: False
  125. batch_size_per_card: 1 # must be 1
  126. num_workers: 2