|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- {
- "dataset_config": {
- "epochs": 2,
- "batch_size": 1,
- "pre_train_dataset": "",
- "fine_tune_dataset": "",
- "test_dataset": "../cnndm_data_prophetnet/dataset_hugging_face_tokenized",
- "valid_dataset": "",
- "dataset_sink_mode": false,
- "dataset_sink_step": 100
- },
- "model_config": {
- "random_seed": 100,
- "save_graphs": false,
- "seq_length": 512,
- "vocab_size": 30522,
- "hidden_size": 512,
- "num_hidden_layers": 3,
- "ngram": 2,
- "disable_ngram_loss": false,
- "num_attention_heads": 8,
- "intermediate_size": 2048,
- "hidden_act": "gelu",
- "hidden_dropout_prob": 0.1,
- "attention_dropout_prob": 0.1,
- "max_position_embeddings": 512,
- "initializer_range": 0.02,
- "label_smoothing": 0.1,
- "beam_width": 5,
- "length_penalty_weight": 1.2,
- "max_decode_length": 110,
- "input_mask_from_dataset": true
- },
- "loss_scale_config": {
- "loss_scale_mode":"static",
- "init_loss_scale": 32,
- "loss_scale_factor": 2,
- "scale_window": 200
- },
- "learn_rate_config": {
- "optimizer": "adam",
- "lr": 1e-4,
- "lr_scheduler": "poly",
- "poly_lr_scheduler_power": 0.5,
- "decay_steps": 10000,
- "decay_start_step": 12000,
- "warmup_steps": 4000,
- "min_lr": 1e-6
- },
- "checkpoint_options": {
- "existed_ckpt": "../training_weight/ckpt-1_20000.ckpt",
- "save_ckpt_steps": 500,
- "keep_ckpt_max": 50,
- "ckpt_prefix": "ckpt",
- "ckpt_path": "checkpoints"
- }
- }
|