You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

finetune_config.py 3.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. config settings, will be used in finetune.py
  17. """
  18. from easydict import EasyDict as edict
  19. import mindspore.common.dtype as mstype
  20. from .bert_model import BertConfig
  21. cfg = edict({
  22. 'task': 'NER',
  23. 'num_labels': 41,
  24. 'data_file': '/your/path/train.tfrecord',
  25. 'schema_file': '/your/path/schema.json',
  26. 'epoch_num': 5,
  27. 'ckpt_prefix': 'bert',
  28. 'ckpt_dir': None,
  29. 'pre_training_ckpt': '/your/path/pre_training.ckpt',
  30. 'use_crf': False,
  31. 'optimizer': 'Lamb',
  32. 'AdamWeightDecayDynamicLR': edict({
  33. 'learning_rate': 2e-5,
  34. 'end_learning_rate': 1e-7,
  35. 'power': 1.0,
  36. 'weight_decay': 1e-5,
  37. 'eps': 1e-6,
  38. }),
  39. 'Lamb': edict({
  40. 'start_learning_rate': 2e-5,
  41. 'end_learning_rate': 1e-7,
  42. 'power': 1.0,
  43. 'weight_decay': 0.01,
  44. 'decay_filter': lambda x: False,
  45. }),
  46. 'Momentum': edict({
  47. 'learning_rate': 2e-5,
  48. 'momentum': 0.9,
  49. }),
  50. })
  51. bert_net_cfg = BertConfig(
  52. batch_size=16,
  53. seq_length=128,
  54. vocab_size=21128,
  55. hidden_size=768,
  56. num_hidden_layers=12,
  57. num_attention_heads=12,
  58. intermediate_size=3072,
  59. hidden_act="gelu",
  60. hidden_dropout_prob=0.1,
  61. attention_probs_dropout_prob=0.1,
  62. max_position_embeddings=512,
  63. type_vocab_size=2,
  64. initializer_range=0.02,
  65. use_relative_positions=False,
  66. input_mask_from_dataset=True,
  67. token_type_ids_from_dataset=True,
  68. dtype=mstype.float32,
  69. compute_type=mstype.float16,
  70. )
  71. tag_to_index = {
  72. "O": 0,
  73. "S_address": 1,
  74. "B_address": 2,
  75. "M_address": 3,
  76. "E_address": 4,
  77. "S_book": 5,
  78. "B_book": 6,
  79. "M_book": 7,
  80. "E_book": 8,
  81. "S_company": 9,
  82. "B_company": 10,
  83. "M_company": 11,
  84. "E_company": 12,
  85. "S_game": 13,
  86. "B_game": 14,
  87. "M_game": 15,
  88. "E_game": 16,
  89. "S_government": 17,
  90. "B_government": 18,
  91. "M_government": 19,
  92. "E_government": 20,
  93. "S_movie": 21,
  94. "B_movie": 22,
  95. "M_movie": 23,
  96. "E_movie": 24,
  97. "S_name": 25,
  98. "B_name": 26,
  99. "M_name": 27,
  100. "E_name": 28,
  101. "S_organization": 29,
  102. "B_organization": 30,
  103. "M_organization": 31,
  104. "E_organization": 32,
  105. "S_position": 33,
  106. "B_position": 34,
  107. "M_position": 35,
  108. "E_position": 36,
  109. "S_scene": 37,
  110. "B_scene": 38,
  111. "M_scene": 39,
  112. "E_scene": 40,
  113. "<START>": 41,
  114. "<STOP>": 42
  115. }