You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

config.json 1.3 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. {
  2. "dataset_config": {
  3. "epochs": 20,
  4. "batch_size": 192,
  5. "pre_train_dataset": "",
  6. "fine_tune_dataset": "",
  7. "test_dataset": "",
  8. "valid_dataset": "",
  9. "dataset_sink_mode": false,
  10. "dataset_sink_step": 100
  11. },
  12. "model_config": {
  13. "random_seed": 100,
  14. "save_graphs": false,
  15. "seq_length": 64,
  16. "vocab_size": 45744,
  17. "hidden_size": 1024,
  18. "num_hidden_layers": 6,
  19. "num_attention_heads": 8,
  20. "intermediate_size": 4096,
  21. "hidden_act": "relu",
  22. "hidden_dropout_prob": 0.2,
  23. "attention_dropout_prob": 0.2,
  24. "max_position_embeddings": 64,
  25. "initializer_range": 0.02,
  26. "label_smoothing": 0.1,
  27. "beam_width": 4,
  28. "length_penalty_weight": 1.0,
  29. "max_decode_length": 64,
  30. "input_mask_from_dataset": true
  31. },
  32. "loss_scale_config": {
  33. "loss_scale_mode": "dynamic",
  34. "init_loss_scale": 65536,
  35. "loss_scale_factor": 2,
  36. "scale_window": 200
  37. },
  38. "learn_rate_config": {
  39. "optimizer": "adam",
  40. "lr": 1e-4,
  41. "lr_scheduler": "poly",
  42. "poly_lr_scheduler_power": 0.5,
  43. "decay_steps": 10000,
  44. "decay_start_step": 12000,
  45. "warmup_steps": 4000,
  46. "min_lr": 1e-6
  47. },
  48. "checkpoint_options": {
  49. "existed_ckpt": "",
  50. "save_ckpt_steps": 2500,
  51. "keep_ckpt_max": 50,
  52. "ckpt_prefix": "ckpt",
  53. "ckpt_path": "checkpoints"
  54. }
  55. }