|
|
|
@@ -120,7 +120,6 @@ class FeatureTokenizer: |
|
|
|
Whether to disable tokenizer parallelism, by default True. |
|
|
|
""" |
|
|
|
cache_dir = conf.cache_path |
|
|
|
os.makedirs(cache_dir, exist_ok=True) |
|
|
|
self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased", cache_dir=cache_dir) |
|
|
|
self.tokenizer.__dict__["model_max_length"] = 512 |
|
|
|
if disable_tokenizer_parallel: # disable tokenizer parallel |
|
|
|
|