You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test.json 1.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. {
  2. "dataset_config": {
  3. "epochs": 2,
  4. "batch_size": 1,
  5. "pre_train_dataset": "",
  6. "fine_tune_dataset": "",
  7. "test_dataset": "../cnndm_data_prophetnet/dataset_hugging_face_tokenized",
  8. "valid_dataset": "",
  9. "dataset_sink_mode": false,
  10. "dataset_sink_step": 100
  11. },
  12. "model_config": {
  13. "random_seed": 100,
  14. "save_graphs": false,
  15. "seq_length": 512,
  16. "vocab_size": 30522,
  17. "hidden_size": 512,
  18. "num_hidden_layers": 3,
  19. "ngram": 2,
  20. "disable_ngram_loss": false,
  21. "num_attention_heads": 8,
  22. "intermediate_size": 2048,
  23. "hidden_act": "gelu",
  24. "hidden_dropout_prob": 0.1,
  25. "attention_dropout_prob": 0.1,
  26. "max_position_embeddings": 512,
  27. "initializer_range": 0.02,
  28. "label_smoothing": 0.1,
  29. "beam_width": 5,
  30. "length_penalty_weight": 1.2,
  31. "max_decode_length": 110,
  32. "input_mask_from_dataset": true
  33. },
  34. "loss_scale_config": {
  35. "loss_scale_mode":"static",
  36. "init_loss_scale": 32,
  37. "loss_scale_factor": 2,
  38. "scale_window": 200
  39. },
  40. "learn_rate_config": {
  41. "optimizer": "adam",
  42. "lr": 1e-4,
  43. "lr_scheduler": "poly",
  44. "poly_lr_scheduler_power": 0.5,
  45. "decay_steps": 10000,
  46. "decay_start_step": 12000,
  47. "warmup_steps": 4000,
  48. "min_lr": 1e-6
  49. },
  50. "checkpoint_options": {
  51. "existed_ckpt": "../training_weight/ckpt-1_20000.ckpt",
  52. "save_ckpt_steps": 500,
  53. "keep_ckpt_max": 50,
  54. "ckpt_prefix": "ckpt",
  55. "ckpt_path": "checkpoints"
  56. }
  57. }