You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pretrain.json 1.6 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. {
  2. "dataset_config": {
  3. "epochs": 2,
  4. "batch_size": 1,
  5. "pre_train_dataset": "../news_crawl/dataset/tf_small_pretrain",
  6. "fine_tune_dataset": "",
  7. "test_dataset": "",
  8. "valid_dataset": "",
  9. "dataset_sink_mode": false,
  10. "dataset_sink_step": 100
  11. },
  12. "model_config": {
  13. "random_seed": 100,
  14. "save_graphs": false,
  15. "seq_length": 128,
  16. "vocab_size": 44000,
  17. "hidden_size": 768,
  18. "num_hidden_layers": 3,
  19. "ngram": 2,
  20. "disable_ngram_loss": false,
  21. "num_attention_heads": 12,
  22. "intermediate_size": 3072,
  23. "hidden_act": "relu",
  24. "hidden_dropout_prob": 0.1,
  25. "attention_dropout_prob": 0.1,
  26. "max_position_embeddings": 64,
  27. "initializer_range": 0.02,
  28. "label_smoothing": 0.1,
  29. "beam_width": 4,
  30. "length_penalty_weight": 1.0,
  31. "max_decode_length": 64,
  32. "input_mask_from_dataset": true
  33. },
  34. "loss_scale_config": {
  35. "loss_scale_mode":"static",
  36. "init_loss_scale": 32,
  37. "loss_scale_factor": 2,
  38. "scale_window": 200
  39. },
  40. "learn_rate_config": {
  41. "optimizer": "adam",
  42. "lr": 1e-4,
  43. "lr_scheduler": "poly",
  44. "poly_lr_scheduler_power": 0.5,
  45. "decay_steps": 10000,
  46. "decay_start_step": 12000,
  47. "warmup_steps": 4000,
  48. "min_lr": 1e-6
  49. },
  50. "checkpoint_options": {
  51. "existed_ckpt": "/home/yanglinfeng/ProphetNet/training_result/checkpoints/ckpt_1_0.ckpt",
  52. "save_ckpt_steps": 10,
  53. "keep_ckpt_max": 50,
  54. "ckpt_prefix": "ckpt",
  55. "ckpt_path": "checkpoints"
  56. }
  57. }