|
|
@@ -103,7 +103,7 @@ if cfg.bert_network == 'large': |
|
|
num_hidden_layers=24, |
|
|
num_hidden_layers=24, |
|
|
num_attention_heads=16, |
|
|
num_attention_heads=16, |
|
|
intermediate_size=4096, |
|
|
intermediate_size=4096, |
|
|
hidden_act="fast_gelu", |
|
|
|
|
|
|
|
|
hidden_act="gelu", |
|
|
hidden_dropout_prob=0.1, |
|
|
hidden_dropout_prob=0.1, |
|
|
attention_probs_dropout_prob=0.1, |
|
|
attention_probs_dropout_prob=0.1, |
|
|
max_position_embeddings=512, |
|
|
max_position_embeddings=512, |
|
|
|