From 83b0adf0a2391a8459b28685d843970fcdbcb310 Mon Sep 17 00:00:00 2001
From: pangda <pangda@alibaba-inc.com>
Date: Thu, 25 Aug 2022 23:04:14 +0800
Subject: [PATCH] [to #42322933] fix bug for multi-lang text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

支持多语言tokenize（830模型）
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9900916
---
 modelscope/preprocessors/nlp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py
index 25576667..222a219a 100644
--- a/modelscope/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -533,7 +533,7 @@ class NERPreprocessor(Preprocessor):
         self.model_dir: str = model_dir
         self.sequence_length = kwargs.pop('sequence_length', 512)
         self.tokenizer = AutoTokenizer.from_pretrained(
-            model_dir, use_fast=False)
+            model_dir, use_fast=True)
         self.is_split_into_words = self.tokenizer.init_kwargs.get(
             'is_split_into_words', False)