Browse Source

[to #42322933] fix punkt file

删除模型仓库中多余的文件,只保留punkt.zip文件,运行时解压
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9800149
master
piaoyu.lxy yingda.chen 3 years ago
parent
commit
87290ed6f0
1 changed files with 9 additions and 0 deletions
  1. +9
    -0
      modelscope/preprocessors/star/fields/common_utils.py

+ 9
- 0
modelscope/preprocessors/star/fields/common_utils.py View File

@@ -193,6 +193,15 @@ class SubPreprocessor():

from nltk import data
data.path.append(os.path.join(self.model_dir, 'nltk_data'))

zippath = os.path.join(self.model_dir, 'nltk_data/tokenizers/punkt')
if os.path.exists(zippath):
print('punkt has already exist!')
else:
import zipfile
with zipfile.ZipFile(zippath + '.zip') as zf:
zf.extractall(
os.path.join(self.model_dir, 'nltk_data/tokenizers/'))
question = nltk.word_tokenize(question)
question = mwtokenizer.tokenize(question)



Loading…
Cancel
Save