table_master.yml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. Global:
  2. use_gpu: true
  3. epoch_num: 17
  4. log_smooth_window: 20
  5. print_batch_step: 100
  6. save_model_dir: ./output/table_master/
  7. save_epoch_step: 17
  8. eval_batch_step: [0, 6259]
  9. cal_metric_during_train: true
  10. pretrained_model: null
  11. checkpoints:
  12. save_inference_dir: output/table_master/infer
  13. use_visualdl: false
  14. infer_img: ppstructure/docs/table/table.jpg
  15. save_res_path: ./output/table_master
  16. character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
  17. infer_mode: false
  18. max_text_length: &max_text_length 500
  19. box_format: &box_format 'xywh' # 'xywh', 'xyxy', 'xyxyxyxy'
  20. Optimizer:
  21. name: Adam
  22. beta1: 0.9
  23. beta2: 0.999
  24. lr:
  25. name: MultiStepDecay
  26. learning_rate: 0.001
  27. milestones: [12, 15]
  28. gamma: 0.1
  29. warmup_epoch: 0.02
  30. regularizer:
  31. name: L2
  32. factor: 0.0
  33. Architecture:
  34. model_type: table
  35. algorithm: TableMaster
  36. Backbone:
  37. name: TableResNetExtra
  38. gcb_config:
  39. ratio: 0.0625
  40. headers: 1
  41. att_scale: False
  42. fusion_type: channel_add
  43. layers: [False, True, True, True]
  44. layers: [1,2,5,3]
  45. Head:
  46. name: TableMasterHead
  47. hidden_size: 512
  48. headers: 8
  49. dropout: 0
  50. d_ff: 2024
  51. max_text_length: *max_text_length
  52. loc_reg_num: &loc_reg_num 4
  53. Loss:
  54. name: TableMasterLoss
  55. ignore_index: 42 # set to len of dict + 3
  56. PostProcess:
  57. name: TableMasterLabelDecode
  58. box_shape: pad
  59. merge_no_span_structure: &merge_no_span_structure True
  60. Metric:
  61. name: TableMetric
  62. main_indicator: acc
  63. compute_bbox_metric: False
  64. box_format: *box_format
  65. Train:
  66. dataset:
  67. name: PubTabDataSet
  68. data_dir: train_data/table/pubtabnet/train/
  69. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
  70. transforms:
  71. - DecodeImage:
  72. img_mode: BGR
  73. channel_first: False
  74. - TableMasterLabelEncode:
  75. learn_empty_box: False
  76. merge_no_span_structure: *merge_no_span_structure
  77. replace_empty_cell_token: True
  78. loc_reg_num: *loc_reg_num
  79. max_text_length: *max_text_length
  80. - ResizeTableImage:
  81. max_len: 480
  82. resize_bboxes: True
  83. - PaddingTableImage:
  84. size: [480, 480]
  85. - TableBoxEncode:
  86. in_box_format: *box_format
  87. out_box_format: *box_format
  88. - NormalizeImage:
  89. scale: 1./255.
  90. mean: [0.5, 0.5, 0.5]
  91. std: [0.5, 0.5, 0.5]
  92. order: hwc
  93. - ToCHWImage: null
  94. - KeepKeys:
  95. keep_keys: [image, structure, bboxes, bbox_masks, shape]
  96. loader:
  97. shuffle: True
  98. batch_size_per_card: 10
  99. drop_last: True
  100. num_workers: 8
  101. Eval:
  102. dataset:
  103. name: PubTabDataSet
  104. data_dir: train_data/table/pubtabnet/val/
  105. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
  106. transforms:
  107. - DecodeImage:
  108. img_mode: BGR
  109. channel_first: False
  110. - TableMasterLabelEncode:
  111. learn_empty_box: False
  112. merge_no_span_structure: *merge_no_span_structure
  113. replace_empty_cell_token: True
  114. loc_reg_num: *loc_reg_num
  115. max_text_length: *max_text_length
  116. - ResizeTableImage:
  117. max_len: 480
  118. resize_bboxes: True
  119. - PaddingTableImage:
  120. size: [480, 480]
  121. - TableBoxEncode:
  122. in_box_format: *box_format
  123. out_box_format: *box_format
  124. - NormalizeImage:
  125. scale: 1./255.
  126. mean: [0.5, 0.5, 0.5]
  127. std: [0.5, 0.5, 0.5]
  128. order: hwc
  129. - ToCHWImage: null
  130. - KeepKeys:
  131. keep_keys: [image, structure, bboxes, bbox_masks, shape]
  132. loader:
  133. shuffle: False
  134. drop_last: False
  135. batch_size_per_card: 10
  136. num_workers: 8