SLANet_ch.yml 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. Global:
  2. use_gpu: True
  3. epoch_num: 400
  4. log_smooth_window: 20
  5. print_batch_step: 20
  6. save_model_dir: ./output/SLANet_ch
  7. save_epoch_step: 400
  8. # evaluation is run every 331 iterations after the 0th iteration
  9. eval_batch_step: [0, 331]
  10. cal_metric_during_train: True
  11. pretrained_model:
  12. checkpoints:
  13. save_inference_dir: ./output/SLANet_ch/infer
  14. use_visualdl: False
  15. infer_img: ppstructure/docs/table/table.jpg
  16. # for data or label process
  17. character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
  18. character_type: en
  19. max_text_length: &max_text_length 500
  20. box_format: &box_format xyxyxyxy # 'xywh', 'xyxy', 'xyxyxyxy'
  21. infer_mode: False
  22. use_sync_bn: True
  23. save_res_path: output/infer
  24. Optimizer:
  25. name: Adam
  26. beta1: 0.9
  27. beta2: 0.999
  28. clip_norm: 5.0
  29. lr:
  30. learning_rate: 0.001
  31. regularizer:
  32. name: 'L2'
  33. factor: 0.00000
  34. Architecture:
  35. model_type: table
  36. algorithm: SLANet
  37. Backbone:
  38. name: PPLCNet
  39. scale: 1.0
  40. pretrained: True
  41. use_ssld: True
  42. Neck:
  43. name: CSPPAN
  44. out_channels: 96
  45. Head:
  46. name: SLAHead
  47. hidden_size: 256
  48. max_text_length: *max_text_length
  49. loc_reg_num: &loc_reg_num 8
  50. Loss:
  51. name: SLALoss
  52. structure_weight: 1.0
  53. loc_weight: 2.0
  54. loc_loss: smooth_l1
  55. PostProcess:
  56. name: TableLabelDecode
  57. merge_no_span_structure: &merge_no_span_structure True
  58. Metric:
  59. name: TableMetric
  60. main_indicator: acc
  61. compute_bbox_metric: False
  62. loc_reg_num: *loc_reg_num
  63. box_format: *box_format
  64. del_thead_tbody: True
  65. Train:
  66. dataset:
  67. name: PubTabDataSet
  68. data_dir: train_data/table/train/
  69. label_file_list: [train_data/table/train.txt]
  70. transforms:
  71. - DecodeImage:
  72. img_mode: BGR
  73. channel_first: False
  74. - TableLabelEncode:
  75. learn_empty_box: False
  76. merge_no_span_structure: *merge_no_span_structure
  77. replace_empty_cell_token: False
  78. loc_reg_num: *loc_reg_num
  79. max_text_length: *max_text_length
  80. - TableBoxEncode:
  81. in_box_format: *box_format
  82. out_box_format: *box_format
  83. - ResizeTableImage:
  84. max_len: 488
  85. - NormalizeImage:
  86. scale: 1./255.
  87. mean: [0.485, 0.456, 0.406]
  88. std: [0.229, 0.224, 0.225]
  89. order: 'hwc'
  90. - PaddingTableImage:
  91. size: [488, 488]
  92. - ToCHWImage:
  93. - KeepKeys:
  94. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  95. loader:
  96. shuffle: True
  97. batch_size_per_card: 48
  98. drop_last: True
  99. num_workers: 1
  100. Eval:
  101. dataset:
  102. name: PubTabDataSet
  103. data_dir: train_data/table/val/
  104. label_file_list: [train_data/table/val.txt]
  105. transforms:
  106. - DecodeImage:
  107. img_mode: BGR
  108. channel_first: False
  109. - TableLabelEncode:
  110. learn_empty_box: False
  111. merge_no_span_structure: *merge_no_span_structure
  112. replace_empty_cell_token: False
  113. loc_reg_num: *loc_reg_num
  114. max_text_length: *max_text_length
  115. - TableBoxEncode:
  116. in_box_format: *box_format
  117. out_box_format: *box_format
  118. - ResizeTableImage:
  119. max_len: 488
  120. - NormalizeImage:
  121. scale: 1./255.
  122. mean: [0.485, 0.456, 0.406]
  123. std: [0.229, 0.224, 0.225]
  124. order: 'hwc'
  125. - PaddingTableImage:
  126. size: [488, 488]
  127. - ToCHWImage:
  128. - KeepKeys:
  129. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  130. loader:
  131. shuffle: False
  132. drop_last: False
  133. batch_size_per_card: 48
  134. num_workers: 1