SLANet.yml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. Global:
  2. use_gpu: true
  3. epoch_num: 100
  4. log_smooth_window: 20
  5. print_batch_step: 20
  6. save_model_dir: ./output/SLANet
  7. save_epoch_step: 400
  8. # evaluation is run every 1000 iterations after the 0th iteration
  9. eval_batch_step: [0, 1000]
  10. cal_metric_during_train: True
  11. pretrained_model:
  12. checkpoints:
  13. save_inference_dir: ./output/SLANet/infer
  14. use_visualdl: False
  15. infer_img: ppstructure/docs/table/table.jpg
  16. # for data or label process
  17. character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  18. character_type: en
  19. max_text_length: &max_text_length 500
  20. box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  21. infer_mode: False
  22. use_sync_bn: True
  23. save_res_path: 'output/infer'
  24. Optimizer:
  25. name: Adam
  26. beta1: 0.9
  27. beta2: 0.999
  28. clip_norm: 5.0
  29. lr:
  30. name: Piecewise
  31. learning_rate: 0.001
  32. decay_epochs : [40, 50]
  33. values : [0.001, 0.0001, 0.00005]
  34. regularizer:
  35. name: 'L2'
  36. factor: 0.00000
  37. Architecture:
  38. model_type: table
  39. algorithm: SLANet
  40. Backbone:
  41. name: PPLCNet
  42. scale: 1.0
  43. pretrained: true
  44. use_ssld: true
  45. Neck:
  46. name: CSPPAN
  47. out_channels: 96
  48. Head:
  49. name: SLAHead
  50. hidden_size: 256
  51. max_text_length: *max_text_length
  52. loc_reg_num: &loc_reg_num 4
  53. Loss:
  54. name: SLALoss
  55. structure_weight: 1.0
  56. loc_weight: 2.0
  57. loc_loss: smooth_l1
  58. PostProcess:
  59. name: TableLabelDecode
  60. merge_no_span_structure: &merge_no_span_structure True
  61. Metric:
  62. name: TableMetric
  63. main_indicator: acc
  64. compute_bbox_metric: False
  65. loc_reg_num: *loc_reg_num
  66. box_format: *box_format
  67. Train:
  68. dataset:
  69. name: PubTabDataSet
  70. data_dir: train_data/table/pubtabnet/train/
  71. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
  72. transforms:
  73. - DecodeImage: # load image
  74. img_mode: BGR
  75. channel_first: False
  76. - TableLabelEncode:
  77. learn_empty_box: False
  78. merge_no_span_structure: *merge_no_span_structure
  79. replace_empty_cell_token: False
  80. loc_reg_num: *loc_reg_num
  81. max_text_length: *max_text_length
  82. - TableBoxEncode:
  83. in_box_format: *box_format
  84. out_box_format: *box_format
  85. - ResizeTableImage:
  86. max_len: 488
  87. - NormalizeImage:
  88. scale: 1./255.
  89. mean: [0.485, 0.456, 0.406]
  90. std: [0.229, 0.224, 0.225]
  91. order: 'hwc'
  92. - PaddingTableImage:
  93. size: [488, 488]
  94. - ToCHWImage:
  95. - KeepKeys:
  96. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  97. loader:
  98. shuffle: True
  99. batch_size_per_card: 48
  100. drop_last: True
  101. num_workers: 1
  102. Eval:
  103. dataset:
  104. name: PubTabDataSet
  105. data_dir: train_data/table/pubtabnet/val/
  106. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
  107. transforms:
  108. - DecodeImage: # load image
  109. img_mode: BGR
  110. channel_first: False
  111. - TableLabelEncode:
  112. learn_empty_box: False
  113. merge_no_span_structure: *merge_no_span_structure
  114. replace_empty_cell_token: False
  115. loc_reg_num: *loc_reg_num
  116. max_text_length: *max_text_length
  117. - TableBoxEncode:
  118. in_box_format: *box_format
  119. out_box_format: *box_format
  120. - ResizeTableImage:
  121. max_len: 488
  122. - NormalizeImage:
  123. scale: 1./255.
  124. mean: [0.485, 0.456, 0.406]
  125. std: [0.229, 0.224, 0.225]
  126. order: 'hwc'
  127. - PaddingTableImage:
  128. size: [488, 488]
  129. - ToCHWImage:
  130. - KeepKeys:
  131. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  132. loader:
  133. shuffle: False
  134. drop_last: False
  135. batch_size_per_card: 48
  136. num_workers: 1