You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

finetune_config.py 3.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. config settings, will be used in finetune.py
  17. """
  18. from easydict import EasyDict as edict
  19. import mindspore.common.dtype as mstype
  20. from mindspore.model_zoo.Bert_NEZHA import BertConfig
  21. cfg = edict({
  22. 'task': 'NER',
  23. 'num_labels': 41,
  24. 'data_file': '/your/path/train.tfrecord',
  25. 'schema_file': '/your/path/schema.json',
  26. 'epoch_num': 5,
  27. 'ckpt_prefix': 'bert',
  28. 'ckpt_dir': None,
  29. 'pre_training_ckpt': '/your/path/pre_training.ckpt',
  30. 'use_crf': False,
  31. 'optimizer': 'Lamb',
  32. 'AdamWeightDecayDynamicLR': edict({
  33. 'learning_rate': 2e-5,
  34. 'end_learning_rate': 1e-7,
  35. 'power': 1.0,
  36. 'weight_decay': 1e-5,
  37. 'eps': 1e-6,
  38. }),
  39. 'Lamb': edict({
  40. 'start_learning_rate': 2e-5,
  41. 'end_learning_rate': 1e-7,
  42. 'power': 1.0,
  43. 'decay_filter': lambda x: False,
  44. }),
  45. 'Momentum': edict({
  46. 'learning_rate': 2e-5,
  47. 'momentum': 0.9,
  48. }),
  49. })
  50. bert_net_cfg = BertConfig(
  51. batch_size=16,
  52. seq_length=128,
  53. vocab_size=21128,
  54. hidden_size=768,
  55. num_hidden_layers=12,
  56. num_attention_heads=12,
  57. intermediate_size=3072,
  58. hidden_act="gelu",
  59. hidden_dropout_prob=0.1,
  60. attention_probs_dropout_prob=0.1,
  61. max_position_embeddings=512,
  62. type_vocab_size=2,
  63. initializer_range=0.02,
  64. use_relative_positions=False,
  65. input_mask_from_dataset=True,
  66. token_type_ids_from_dataset=True,
  67. dtype=mstype.float32,
  68. compute_type=mstype.float16,
  69. )
  70. tag_to_index = {
  71. "O": 0,
  72. "S_address": 1,
  73. "B_address": 2,
  74. "M_address": 3,
  75. "E_address": 4,
  76. "S_book": 5,
  77. "B_book": 6,
  78. "M_book": 7,
  79. "E_book": 8,
  80. "S_company": 9,
  81. "B_company": 10,
  82. "M_company": 11,
  83. "E_company": 12,
  84. "S_game": 13,
  85. "B_game": 14,
  86. "M_game": 15,
  87. "E_game": 16,
  88. "S_government": 17,
  89. "B_government": 18,
  90. "M_government": 19,
  91. "E_government": 20,
  92. "S_movie": 21,
  93. "B_movie": 22,
  94. "M_movie": 23,
  95. "E_movie": 24,
  96. "S_name": 25,
  97. "B_name": 26,
  98. "M_name": 27,
  99. "E_name": 28,
  100. "S_organization": 29,
  101. "B_organization": 30,
  102. "M_organization": 31,
  103. "E_organization": 32,
  104. "S_position": 33,
  105. "B_position": 34,
  106. "M_position": 35,
  107. "E_position": 36,
  108. "S_scene": 37,
  109. "B_scene": 38,
  110. "M_scene": 39,
  111. "E_scene": 40,
  112. "<START>": 41,
  113. "<STOP>": 42
  114. }