table_mv3.yml 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. Global:
  2. use_gpu: true
  3. epoch_num: 400
  4. log_smooth_window: 20
  5. print_batch_step: 5
  6. save_model_dir: ./output/table_mv3/
  7. save_epoch_step: 400
  8. # evaluation is run every 400 iterations after the 0th iteration
  9. eval_batch_step: [0, 400]
  10. cal_metric_during_train: True
  11. pretrained_model:
  12. checkpoints:
  13. save_inference_dir:
  14. use_visualdl: False
  15. infer_img: ppstructure/docs/table/table.jpg
  16. save_res_path: output/table_mv3
  17. # for data or label process
  18. character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  19. character_type: en
  20. max_text_length: &max_text_length 500
  21. box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  22. infer_mode: False
  23. Optimizer:
  24. name: Adam
  25. beta1: 0.9
  26. beta2: 0.999
  27. clip_norm: 5.0
  28. lr:
  29. learning_rate: 0.001
  30. regularizer:
  31. name: 'L2'
  32. factor: 0.00000
  33. Architecture:
  34. model_type: table
  35. algorithm: TableAttn
  36. Backbone:
  37. name: MobileNetV3
  38. scale: 1.0
  39. model_name: small
  40. disable_se: true
  41. Head:
  42. name: TableAttentionHead
  43. hidden_size: 256
  44. max_text_length: *max_text_length
  45. loc_reg_num: &loc_reg_num 4
  46. Loss:
  47. name: TableAttentionLoss
  48. structure_weight: 100.0
  49. loc_weight: 10000.0
  50. PostProcess:
  51. name: TableLabelDecode
  52. Metric:
  53. name: TableMetric
  54. main_indicator: acc
  55. compute_bbox_metric: false # cost many time, set False for training
  56. Train:
  57. dataset:
  58. name: PubTabDataSet
  59. data_dir: train_data/table/pubtabnet/train/
  60. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
  61. transforms:
  62. - DecodeImage: # load image
  63. img_mode: BGR
  64. channel_first: False
  65. - TableLabelEncode:
  66. learn_empty_box: False
  67. merge_no_span_structure: False
  68. replace_empty_cell_token: False
  69. loc_reg_num: *loc_reg_num
  70. max_text_length: *max_text_length
  71. - TableBoxEncode:
  72. - ResizeTableImage:
  73. max_len: 488
  74. - NormalizeImage:
  75. scale: 1./255.
  76. mean: [0.485, 0.456, 0.406]
  77. std: [0.229, 0.224, 0.225]
  78. order: 'hwc'
  79. - PaddingTableImage:
  80. size: [488, 488]
  81. - ToCHWImage:
  82. - KeepKeys:
  83. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  84. loader:
  85. shuffle: True
  86. batch_size_per_card: 48
  87. drop_last: True
  88. num_workers: 1
  89. Eval:
  90. dataset:
  91. name: PubTabDataSet
  92. data_dir: train_data/table/pubtabnet/val/
  93. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
  94. transforms:
  95. - DecodeImage: # load image
  96. img_mode: BGR
  97. channel_first: False
  98. - TableLabelEncode:
  99. learn_empty_box: False
  100. merge_no_span_structure: False
  101. replace_empty_cell_token: False
  102. loc_reg_num: *loc_reg_num
  103. max_text_length: *max_text_length
  104. - TableBoxEncode:
  105. - ResizeTableImage:
  106. max_len: 488
  107. - NormalizeImage:
  108. scale: 1./255.
  109. mean: [0.485, 0.456, 0.406]
  110. std: [0.229, 0.224, 0.225]
  111. order: 'hwc'
  112. - PaddingTableImage:
  113. size: [488, 488]
  114. - ToCHWImage:
  115. - KeepKeys:
  116. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  117. loader:
  118. shuffle: False
  119. drop_last: False
  120. batch_size_per_card: 48
  121. num_workers: 1