{ "dataset_config": { "epochs": 2, "batch_size": 1, "pre_train_dataset": "../news_crawl/dataset/tf_small_pretrain", "fine_tune_dataset": "", "test_dataset": "", "valid_dataset": "", "dataset_sink_mode": false, "dataset_sink_step": 100 }, "model_config": { "random_seed": 100, "save_graphs": false, "seq_length": 128, "vocab_size": 44000, "hidden_size": 768, "num_hidden_layers": 3, "ngram": 2, "disable_ngram_loss": false, "num_attention_heads": 12, "intermediate_size": 3072, "hidden_act": "relu", "hidden_dropout_prob": 0.1, "attention_dropout_prob": 0.1, "max_position_embeddings": 64, "initializer_range": 0.02, "label_smoothing": 0.1, "beam_width": 4, "length_penalty_weight": 1.0, "max_decode_length": 64, "input_mask_from_dataset": true }, "loss_scale_config": { "loss_scale_mode":"static", "init_loss_scale": 32, "loss_scale_factor": 2, "scale_window": 200 }, "learn_rate_config": { "optimizer": "adam", "lr": 1e-4, "lr_scheduler": "poly", "poly_lr_scheduler_power": 0.5, "decay_steps": 10000, "decay_start_step": 12000, "warmup_steps": 4000, "min_lr": 1e-6 }, "checkpoint_options": { "existed_ckpt": "/home/yanglinfeng/ProphetNet/training_result/checkpoints/ckpt_1_0.ckpt", "save_ckpt_steps": 10, "keep_ckpt_max": 50, "ckpt_prefix": "ckpt", "ckpt_path": "checkpoints" } }