You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

config.py 3.1 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ===========================================================================
  15. """
  16. network config setting, will be used in train.py and eval.py
  17. """
  18. from easydict import EasyDict as ed
  19. train_config = ed({
  20. "TrainingConfig": {
  21. "epochs": 70,
  22. },
  23. "DataConfig": {
  24. "train_manifest": 'data/libri_train_manifest.csv',
  25. # "val_manifest": 'data/libri_val_manifest.csv',
  26. "batch_size": 20,
  27. "labels_path": "labels.json",
  28. "SpectConfig": {
  29. "sample_rate": 16000,
  30. "window_size": 0.02,
  31. "window_stride": 0.01,
  32. "window": "hamming"
  33. },
  34. "AugmentationConfig": {
  35. "speed_volume_perturb": False,
  36. "spec_augment": False,
  37. "noise_dir": '',
  38. "noise_prob": 0.4,
  39. "noise_min": 0.0,
  40. "noise_max": 0.5,
  41. }
  42. },
  43. "ModelConfig": {
  44. "rnn_type": "LSTM",
  45. "hidden_size": 1024,
  46. "hidden_layers": 5,
  47. "lookahead_context": 20,
  48. },
  49. "OptimConfig": {
  50. "learning_rate": 3e-4,
  51. "learning_anneal": 1.1,
  52. "weight_decay": 1e-5,
  53. "momentum": 0.9,
  54. "eps": 1e-8,
  55. "betas": (0.9, 0.999),
  56. "loss_scale": 1024,
  57. "epsilon": 0.00001
  58. },
  59. "CheckpointConfig": {
  60. "ckpt_file_name_prefix": 'DeepSpeech',
  61. "ckpt_path": './checkpoint',
  62. "keep_checkpoint_max": 10
  63. }
  64. })
  65. eval_config = ed({
  66. "save_output": 'librispeech_val_output',
  67. "verbose": True,
  68. "DataConfig": {
  69. "test_manifest": 'data/libri_test_clean_manifest.csv',
  70. # "test_manifest": 'data/libri_test_other_manifest.csv',
  71. # "test_manifest": 'data/libri_val_manifest.csv',
  72. "batch_size": 20,
  73. "labels_path": "labels.json",
  74. "SpectConfig": {
  75. "sample_rate": 16000,
  76. "window_size": 0.02,
  77. "window_stride": 0.01,
  78. "window": "hanning"
  79. },
  80. },
  81. "ModelConfig": {
  82. "rnn_type": "LSTM",
  83. "hidden_size": 1024,
  84. "hidden_layers": 5,
  85. "lookahead_context": 20,
  86. },
  87. "LMConfig": {
  88. "decoder_type": "greedy",
  89. "lm_path": './3-gram.pruned.3e-7.arpa',
  90. "top_paths": 1,
  91. "alpha": 1.818182,
  92. "beta": 0,
  93. "cutoff_top_n": 40,
  94. "cutoff_prob": 1.0,
  95. "beam_width": 1024,
  96. "lm_workers": 4
  97. },
  98. })