123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- Global:
- use_gpu: true
- epoch_num: 17
- log_smooth_window: 20
- print_batch_step: 100
- save_model_dir: ./output/table_master/
- save_epoch_step: 17
- eval_batch_step: [0, 6259]
- cal_metric_during_train: true
- pretrained_model: null
- checkpoints:
- save_inference_dir: output/table_master/infer
- use_visualdl: false
- infer_img: ppstructure/docs/table/table.jpg
- save_res_path: ./output/table_master
- character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
- infer_mode: false
- max_text_length: &max_text_length 500
- box_format: &box_format 'xywh' # 'xywh', 'xyxy', 'xyxyxyxy'
- Optimizer:
- name: Adam
- beta1: 0.9
- beta2: 0.999
- lr:
- name: MultiStepDecay
- learning_rate: 0.001
- milestones: [12, 15]
- gamma: 0.1
- warmup_epoch: 0.02
- regularizer:
- name: L2
- factor: 0.0
- Architecture:
- model_type: table
- algorithm: TableMaster
- Backbone:
- name: TableResNetExtra
- gcb_config:
- ratio: 0.0625
- headers: 1
- att_scale: False
- fusion_type: channel_add
- layers: [False, True, True, True]
- layers: [1,2,5,3]
- Head:
- name: TableMasterHead
- hidden_size: 512
- headers: 8
- dropout: 0
- d_ff: 2024
- max_text_length: *max_text_length
- loc_reg_num: &loc_reg_num 4
- Loss:
- name: TableMasterLoss
- ignore_index: 42 # set to len of dict + 3
- PostProcess:
- name: TableMasterLabelDecode
- box_shape: pad
- merge_no_span_structure: &merge_no_span_structure True
- Metric:
- name: TableMetric
- main_indicator: acc
- compute_bbox_metric: False
- box_format: *box_format
- Train:
- dataset:
- name: PubTabDataSet
- data_dir: train_data/table/pubtabnet/train/
- label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
- transforms:
- - DecodeImage:
- img_mode: BGR
- channel_first: False
- - TableMasterLabelEncode:
- learn_empty_box: False
- merge_no_span_structure: *merge_no_span_structure
- replace_empty_cell_token: True
- loc_reg_num: *loc_reg_num
- max_text_length: *max_text_length
- - ResizeTableImage:
- max_len: 480
- resize_bboxes: True
- - PaddingTableImage:
- size: [480, 480]
- - TableBoxEncode:
- in_box_format: *box_format
- out_box_format: *box_format
- - NormalizeImage:
- scale: 1./255.
- mean: [0.5, 0.5, 0.5]
- std: [0.5, 0.5, 0.5]
- order: hwc
- - ToCHWImage: null
- - KeepKeys:
- keep_keys: [image, structure, bboxes, bbox_masks, shape]
- loader:
- shuffle: True
- batch_size_per_card: 10
- drop_last: True
- num_workers: 8
- Eval:
- dataset:
- name: PubTabDataSet
- data_dir: train_data/table/pubtabnet/val/
- label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
- transforms:
- - DecodeImage:
- img_mode: BGR
- channel_first: False
- - TableMasterLabelEncode:
- learn_empty_box: False
- merge_no_span_structure: *merge_no_span_structure
- replace_empty_cell_token: True
- loc_reg_num: *loc_reg_num
- max_text_length: *max_text_length
- - ResizeTableImage:
- max_len: 480
- resize_bboxes: True
- - PaddingTableImage:
- size: [480, 480]
- - TableBoxEncode:
- in_box_format: *box_format
- out_box_format: *box_format
- - NormalizeImage:
- scale: 1./255.
- mean: [0.5, 0.5, 0.5]
- std: [0.5, 0.5, 0.5]
- order: hwc
- - ToCHWImage: null
- - KeepKeys:
- keep_keys: [image, structure, bboxes, bbox_masks, shape]
- loader:
- shuffle: False
- drop_last: False
- batch_size_per_card: 10
- num_workers: 8
|