table_master.yml 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. Global:
  2. use_gpu: true
  3. epoch_num: 17
  4. log_smooth_window: 20
  5. print_batch_step: 100
  6. save_model_dir: ./output/table_master/
  7. save_epoch_step: 17
  8. eval_batch_step: [0, 6259]
  9. cal_metric_during_train: true
  10. pretrained_model: null
  11. checkpoints:
  12. save_inference_dir: output/table_master/infer
  13. use_visualdl: false
  14. infer_img: ppstructure/docs/table/table.jpg
  15. save_res_path: ./output/table_master
  16. character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
  17. infer_mode: false
  18. max_text_length: 500
  19. Optimizer:
  20. name: Adam
  21. beta1: 0.9
  22. beta2: 0.999
  23. lr:
  24. name: MultiStepDecay
  25. learning_rate: 0.001
  26. milestones: [12, 15]
  27. gamma: 0.1
  28. warmup_epoch: 0.02
  29. regularizer:
  30. name: L2
  31. factor: 0.0
  32. Architecture:
  33. model_type: table
  34. algorithm: TableMaster
  35. Backbone:
  36. name: TableResNetExtra
  37. gcb_config:
  38. ratio: 0.0625
  39. headers: 1
  40. att_scale: False
  41. fusion_type: channel_add
  42. layers: [False, True, True, True]
  43. layers: [1,2,5,3]
  44. Head:
  45. name: TableMasterHead
  46. hidden_size: 512
  47. headers: 8
  48. dropout: 0
  49. d_ff: 2024
  50. max_text_length: 500
  51. Loss:
  52. name: TableMasterLoss
  53. ignore_index: 42 # set to len of dict + 3
  54. PostProcess:
  55. name: TableMasterLabelDecode
  56. box_shape: pad
  57. Metric:
  58. name: TableMetric
  59. main_indicator: acc
  60. compute_bbox_metric: False
  61. Train:
  62. dataset:
  63. name: PubTabDataSet
  64. data_dir: ./train_data/pubtabnet/train
  65. label_file_list: [./train_data/pubtabnet/train.jsonl]
  66. transforms:
  67. - DecodeImage:
  68. img_mode: BGR
  69. channel_first: False
  70. - TableMasterLabelEncode:
  71. learn_empty_box: False
  72. merge_no_span_structure: True
  73. replace_empty_cell_token: True
  74. - ResizeTableImage:
  75. max_len: 480
  76. resize_bboxes: True
  77. - PaddingTableImage:
  78. size: [480, 480]
  79. - TableBoxEncode:
  80. box_format: 'xywh'
  81. - NormalizeImage:
  82. scale: 1./255.
  83. mean: [0.5, 0.5, 0.5]
  84. std: [0.5, 0.5, 0.5]
  85. order: hwc
  86. - ToCHWImage: null
  87. - KeepKeys:
  88. keep_keys: [image, structure, bboxes, bbox_masks, shape]
  89. loader:
  90. shuffle: True
  91. batch_size_per_card: 10
  92. drop_last: True
  93. num_workers: 8
  94. Eval:
  95. dataset:
  96. name: PubTabDataSet
  97. data_dir: ./train_data/pubtabnet/test/
  98. label_file_list: [./train_data/pubtabnet/test.jsonl]
  99. transforms:
  100. - DecodeImage:
  101. img_mode: BGR
  102. channel_first: False
  103. - TableMasterLabelEncode:
  104. learn_empty_box: False
  105. merge_no_span_structure: True
  106. replace_empty_cell_token: True
  107. - ResizeTableImage:
  108. max_len: 480
  109. resize_bboxes: True
  110. - PaddingTableImage:
  111. size: [480, 480]
  112. - TableBoxEncode:
  113. box_format: 'xywh'
  114. - NormalizeImage:
  115. scale: 1./255.
  116. mean: [0.5, 0.5, 0.5]
  117. std: [0.5, 0.5, 0.5]
  118. order: hwc
  119. - ToCHWImage: null
  120. - KeepKeys:
  121. keep_keys: [image, structure, bboxes, bbox_masks, shape]
  122. loader:
  123. shuffle: False
  124. drop_last: False
  125. batch_size_per_card: 10
  126. num_workers: 8