From c4b6a23bc96d12152774f616de1c4177f7a84116 Mon Sep 17 00:00:00 2001 From: "yingda.chen" Date: Mon, 20 Jun 2022 10:54:00 +0800 Subject: [PATCH 1/6] [to #42322933] unify naming for model and pipeline files Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9083378 --- modelscope/models/nlp/__init__.py | 8 ++++---- ...ation_model.py => bert_for_sequence_classification.py} | 0 ...xt_generation_model.py => palm_for_text_generation.py} | 2 +- ...milarity_model.py => sbert_for_sentence_similarity.py} | 0 ...ication_model.py => sbert_for_token_classification.py} | 1 - modelscope/pipelines/multi_modal/__init__.py | 2 +- .../{image_captioning.py => image_caption_pipeline.py} | 0 modelscope/pipelines/nlp/sentence_similarity_pipeline.py | 3 --- .../pipelines/nlp/sequence_classification_pipeline.py | 3 --- modelscope/pipelines/nlp/word_segmentation_pipeline.py | 2 -- 10 files changed, 6 insertions(+), 15 deletions(-) rename modelscope/models/nlp/{sequence_classification_model.py => bert_for_sequence_classification.py} (100%) rename modelscope/models/nlp/{text_generation_model.py => palm_for_text_generation.py} (98%) rename modelscope/models/nlp/{sentence_similarity_model.py => sbert_for_sentence_similarity.py} (100%) rename modelscope/models/nlp/{token_classification_model.py => sbert_for_token_classification.py} (99%) rename modelscope/pipelines/multi_modal/{image_captioning.py => image_caption_pipeline.py} (100%) diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index aefcef4a..7129fcb8 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -1,4 +1,4 @@ -from .sentence_similarity_model import * # noqa F403 -from .sequence_classification_model import * # noqa F403 -from .text_generation_model import * # noqa F403 -from .token_classification_model import * # noqa F403 +from .bert_for_sequence_classification import * # noqa F403 +from .palm_for_text_generation import * # noqa F403 +from .sbert_for_sentence_similarity import * # noqa F403 +from .sbert_for_token_classification import * # noqa F403 diff --git a/modelscope/models/nlp/sequence_classification_model.py b/modelscope/models/nlp/bert_for_sequence_classification.py similarity index 100% rename from modelscope/models/nlp/sequence_classification_model.py rename to modelscope/models/nlp/bert_for_sequence_classification.py diff --git a/modelscope/models/nlp/text_generation_model.py b/modelscope/models/nlp/palm_for_text_generation.py similarity index 98% rename from modelscope/models/nlp/text_generation_model.py rename to modelscope/models/nlp/palm_for_text_generation.py index 8feac691..ffba7265 100644 --- a/modelscope/models/nlp/text_generation_model.py +++ b/modelscope/models/nlp/palm_for_text_generation.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Dict from modelscope.utils.constant import Tasks from ..base import Model, Tensor diff --git a/modelscope/models/nlp/sentence_similarity_model.py b/modelscope/models/nlp/sbert_for_sentence_similarity.py similarity index 100% rename from modelscope/models/nlp/sentence_similarity_model.py rename to modelscope/models/nlp/sbert_for_sentence_similarity.py diff --git a/modelscope/models/nlp/token_classification_model.py b/modelscope/models/nlp/sbert_for_token_classification.py similarity index 99% rename from modelscope/models/nlp/token_classification_model.py rename to modelscope/models/nlp/sbert_for_token_classification.py index 43d4aafb..b918dc37 100644 --- a/modelscope/models/nlp/token_classification_model.py +++ b/modelscope/models/nlp/sbert_for_token_classification.py @@ -1,4 +1,3 @@ -import os from typing import Any, Dict, Union import numpy as np diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py index 7d9a2c59..b1ee121c 100644 --- a/modelscope/pipelines/multi_modal/__init__.py +++ b/modelscope/pipelines/multi_modal/__init__.py @@ -1 +1 @@ -from .image_captioning import ImageCaptionPipeline +from .image_caption_pipeline import ImageCaptionPipeline diff --git a/modelscope/pipelines/multi_modal/image_captioning.py b/modelscope/pipelines/multi_modal/image_caption_pipeline.py similarity index 100% rename from modelscope/pipelines/multi_modal/image_captioning.py rename to modelscope/pipelines/multi_modal/image_caption_pipeline.py diff --git a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py index 44d91756..1b630c10 100644 --- a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py +++ b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py @@ -1,8 +1,5 @@ -import os -import uuid from typing import Any, Dict, Union -import json import numpy as np from modelscope.models.nlp import SbertForSentenceSimilarity diff --git a/modelscope/pipelines/nlp/sequence_classification_pipeline.py b/modelscope/pipelines/nlp/sequence_classification_pipeline.py index 9d2e4273..1dbe2efd 100644 --- a/modelscope/pipelines/nlp/sequence_classification_pipeline.py +++ b/modelscope/pipelines/nlp/sequence_classification_pipeline.py @@ -1,8 +1,5 @@ -import os -import uuid from typing import Any, Dict, Union -import json import numpy as np from modelscope.models.nlp import BertForSequenceClassification diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py index 49aa112a..1cc08a38 100644 --- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py +++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py @@ -1,7 +1,5 @@ from typing import Any, Dict, Optional, Union -import numpy as np - from modelscope.models import Model from modelscope.models.nlp import StructBertForTokenClassification from modelscope.preprocessors import TokenClassifcationPreprocessor From 99fb50369544c244f1045bc880b6a04f300506bd Mon Sep 17 00:00:00 2001 From: "hemu.zp" Date: Mon, 20 Jun 2022 16:00:31 +0800 Subject: [PATCH 2/6] [to #42322933] Add Palm2.0 model. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 接入支持中英文的 Palm2.0 模型,复用 text-generation-pipeline Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9066550 --- .../models/nlp/palm_for_text_generation.py | 25 +++---- modelscope/pipelines/builder.py | 3 +- .../pipelines/nlp/text_generation_pipeline.py | 34 +++++---- modelscope/preprocessors/nlp.py | 11 ++- requirements/nlp.txt | 2 +- requirements/runtime.txt | 2 +- tests/pipelines/test_text_generation.py | 72 ++++++++++++------- 7 files changed, 83 insertions(+), 66 deletions(-) diff --git a/modelscope/models/nlp/palm_for_text_generation.py b/modelscope/models/nlp/palm_for_text_generation.py index ffba7265..e5799feb 100644 --- a/modelscope/models/nlp/palm_for_text_generation.py +++ b/modelscope/models/nlp/palm_for_text_generation.py @@ -7,7 +7,7 @@ from ..builder import MODELS __all__ = ['PalmForTextGeneration'] -@MODELS.register_module(Tasks.text_generation, module_name=r'palm') +@MODELS.register_module(Tasks.text_generation, module_name=r'palm2.0') class PalmForTextGeneration(Model): def __init__(self, model_dir: str, *args, **kwargs): @@ -18,35 +18,26 @@ class PalmForTextGeneration(Model): model_cls (Optional[Any], optional): model loader, if None, use the default loader to load model weights, by default None. """ - from sofa import PalmTokenizer - super().__init__(model_dir, *args, **kwargs) self.model_dir = model_dir - from sofa.models.palm import PalmForConditionalGeneration, TextGenerator - tokenizer = kwargs.pop('tokenizer', - PalmTokenizer.from_pretrained(model_dir)) + from sofa.models.palm_v2 import PalmForConditionalGeneration, Translator model = PalmForConditionalGeneration.from_pretrained(model_dir) - self.generator = TextGenerator(model, tokenizer) + self.tokenizer = model.tokenizer + self.generator = Translator(model) def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: """return the result by the model Args: - input (Dict[str, Any]): the preprocessed data + input (Dict[str, Tensor]): the preprocessed data Returns: - Dict[str, np.ndarray]: results + Dict[str, Tensor]: results Example: { - 'predictions': array([1]), # lable 0-negative 1-positive - 'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32), - 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value + 'predictions': Tensor([[1377, 4959, 2785, 6392...])]), # tokens need to be decode by tokenizer } """ - encoder_inputs = [ - input['input_ids'], input['token_type_ids'], - input['attention_mask'] - ] - return self.generator(encoder_inputs) + return self.generator(**input) diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index c24a7c3e..6e2c791d 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -22,7 +22,8 @@ DEFAULT_MODEL_FOR_PIPELINE = { Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'), Tasks.text_classification: ('bert-sentiment-analysis', 'damo/bert-base-sst2'), - Tasks.text_generation: ('palm', 'damo/nlp_palm_text-generation_chinese'), + Tasks.text_generation: ('palm2.0', + 'damo/nlp_palm2.0_text-generation_chinese-base'), Tasks.image_captioning: ('ofa', None), Tasks.image_generation: ('person-image-cartoon', diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 8b6bf8a9..881e7ea6 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -10,7 +10,7 @@ from ..builder import PIPELINES __all__ = ['TextGenerationPipeline'] -@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm') +@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm2.0') class TextGenerationPipeline(Pipeline): def __init__(self, @@ -23,15 +23,16 @@ class TextGenerationPipeline(Pipeline): model (SequenceClassificationModel): a model instance preprocessor (SequenceClassificationPreprocessor): a preprocessor instance """ - sc_model = model if isinstance( + model = model if isinstance( model, PalmForTextGeneration) else Model.from_pretrained(model) if preprocessor is None: preprocessor = TextGenerationPreprocessor( - sc_model.model_dir, + model.model_dir, + model.tokenizer, first_sequence='sentence', second_sequence=None) - super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) - self.tokenizer = preprocessor.tokenizer + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + self.tokenizer = model.tokenizer def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]: """process the prediction results @@ -42,17 +43,20 @@ class TextGenerationPipeline(Pipeline): Returns: Dict[str, str]: the prediction results """ + replace_tokens_bert = (('[unused0]', ''), ('[PAD]', ''), + ('[unused1]', ''), (r' +', ' '), ('[SEP]', ''), + ('[unused2]', ''), ('[CLS]', ''), ('[UNK]', '')) + replace_tokens_roberta = ((r' +', ' '), ('', ''), ('', + ''), + ('', ''), ('', ''), ('', ' ')) - vocab_size = len(self.tokenizer.vocab) pred_list = inputs['predictions'] pred_ids = pred_list[0][0].cpu().numpy().tolist() - for j in range(len(pred_ids)): - if pred_ids[j] >= vocab_size: - pred_ids[j] = 100 - pred = self.tokenizer.convert_ids_to_tokens(pred_ids) - pred_string = ''.join(pred).replace( - '##', - '').split('[SEP]')[0].replace('[CLS]', - '').replace('[SEP]', - '').replace('[UNK]', '') + pred_string = self.tokenizer.decode(pred_ids) + for _old, _new in replace_tokens_bert: + pred_string = pred_string.replace(_old, _new) + pred_string.strip() + for _old, _new in replace_tokens_roberta: + pred_string = pred_string.replace(_old, _new) + pred_string.strip() return {'text': pred_string} diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 6a4a25fc..9bcaa87c 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -115,17 +115,15 @@ class SequenceClassificationPreprocessor(Preprocessor): return rst -@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm') +@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm2.0') class TextGenerationPreprocessor(Preprocessor): - def __init__(self, model_dir: str, *args, **kwargs): + def __init__(self, model_dir: str, tokenizer, *args, **kwargs): """preprocess the data using the vocab.txt from the `model_dir` path Args: model_dir (str): model path """ - from sofa import PalmTokenizer - super().__init__(*args, **kwargs) self.model_dir: str = model_dir @@ -134,7 +132,7 @@ class TextGenerationPreprocessor(Preprocessor): self.second_sequence: str = kwargs.pop('second_sequence', 'second_sequence') self.sequence_length: int = kwargs.pop('sequence_length', 128) - self.tokenizer = PalmTokenizer.from_pretrained(model_dir) + self.tokenizer = tokenizer @type_assert(object, str) def __call__(self, data: str) -> Dict[str, Any]: @@ -153,7 +151,7 @@ class TextGenerationPreprocessor(Preprocessor): new_data = {self.first_sequence: data} # preprocess the data for the model input - rst = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []} + rst = {'input_ids': [], 'attention_mask': []} max_seq_length = self.sequence_length @@ -168,7 +166,6 @@ class TextGenerationPreprocessor(Preprocessor): rst['input_ids'].append(feature['input_ids']) rst['attention_mask'].append(feature['attention_mask']) - rst['token_type_ids'].append(feature['token_type_ids']) return {k: torch.tensor(v) for k, v in rst.items()} diff --git a/requirements/nlp.txt b/requirements/nlp.txt index 8de83798..4e146a81 100644 --- a/requirements/nlp.txt +++ b/requirements/nlp.txt @@ -1 +1 @@ -https://alinlp.alibaba-inc.com/pypi/sofa-1.0.1.3-py3-none-any.whl +https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl diff --git a/requirements/runtime.txt b/requirements/runtime.txt index dd5616a2..e97352aa 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,7 +1,7 @@ addict datasets easydict -https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.2.dev0-py3-none-any.whl +https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.4.dev0-py3-none-any.whl numpy opencv-python-headless Pillow>=6.2.0 diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py index 39d57ff7..fbdd165f 100644 --- a/tests/pipelines/test_text_generation.py +++ b/tests/pipelines/test_text_generation.py @@ -12,43 +12,67 @@ from modelscope.utils.test_utils import test_level class TextGenerationTest(unittest.TestCase): - model_id = 'damo/nlp_palm_text-generation_chinese' - input1 = "今日天气类型='晴'&温度变化趋势='大幅上升'&最低气温='28℃'&最高气温='31℃'&体感='湿热'" - input2 = "今日天气类型='多云'&体感='舒适'&最低气温='26℃'&最高气温='30℃'" + model_id_zh = 'damo/nlp_palm2.0_text-generation_chinese-base' + model_id_en = 'damo/nlp_palm2.0_text-generation_english-base' + input_zh = """ + 本文总结了十个可穿戴产品的设计原则,而这些原则,同样也是笔者认为是这个行业最吸引人的地方: + 1.为人们解决重复性问题;2.从人开始,而不是从机器开始;3.要引起注意,但不要刻意;4.提升用户能力,而不是取代 + """ + input_en = """ + The Director of Public Prosecutions who let off Lord Janner over alleged child sex abuse started + her career at a legal chambers when the disgraced Labour peer was a top QC there . Alison Saunders , + 54 , sparked outrage last week when she decided the 86-year-old should not face astring of charges + of paedophilia against nine children because he has dementia . Today , newly-released documents + revealed damning evidence that abuse was covered up by police andsocial workers for more than 20 years . + And now it has emerged Mrs Saunders ' law career got off to a flying start when she secured her + pupillage -- a barrister 's training contract at 1 Garden Court Chambers in London in 1983 . + """ @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run(self): - cache_path = snapshot_download(self.model_id) - preprocessor = TextGenerationPreprocessor( - cache_path, first_sequence='sentence', second_sequence=None) - model = PalmForTextGeneration( - cache_path, tokenizer=preprocessor.tokenizer) - pipeline1 = TextGenerationPipeline(model, preprocessor) - pipeline2 = pipeline( - Tasks.text_generation, model=model, preprocessor=preprocessor) - print(f'input: {self.input1}\npipeline1: {pipeline1(self.input1)}') - print() - print(f'input: {self.input2}\npipeline2: {pipeline2(self.input2)}') + for model_id, input in ((self.model_id_zh, self.input_zh), + (self.model_id_en, self.input_en)): + cache_path = snapshot_download(model_id) + model = PalmForTextGeneration(cache_path) + preprocessor = TextGenerationPreprocessor( + cache_path, + model.tokenizer, + first_sequence='sentence', + second_sequence=None) + pipeline1 = TextGenerationPipeline(model, preprocessor) + pipeline2 = pipeline( + Tasks.text_generation, model=model, preprocessor=preprocessor) + print( + f'pipeline1: {pipeline1(input)}\npipeline2: {pipeline2(input)}' + ) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_from_modelhub(self): - model = Model.from_pretrained(self.model_id) - preprocessor = TextGenerationPreprocessor( - model.model_dir, first_sequence='sentence', second_sequence=None) - pipeline_ins = pipeline( - task=Tasks.text_generation, model=model, preprocessor=preprocessor) - print(pipeline_ins(self.input1)) + for model_id, input in ((self.model_id_zh, self.input_zh), + (self.model_id_en, self.input_en)): + model = Model.from_pretrained(model_id) + preprocessor = TextGenerationPreprocessor( + model.model_dir, + model.tokenizer, + first_sequence='sentence', + second_sequence=None) + pipeline_ins = pipeline( + task=Tasks.text_generation, + model=model, + preprocessor=preprocessor) + print(pipeline_ins(input)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): - pipeline_ins = pipeline( - task=Tasks.text_generation, model=self.model_id) - print(pipeline_ins(self.input2)) + for model_id, input in ((self.model_id_zh, self.input_zh), + (self.model_id_en, self.input_en)): + pipeline_ins = pipeline(task=Tasks.text_generation, model=model_id) + print(pipeline_ins(input)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_default_model(self): pipeline_ins = pipeline(task=Tasks.text_generation) - print(pipeline_ins(self.input2)) + print(pipeline_ins(self.input_zh)) if __name__ == '__main__': From c99f3a9b8c0ede1578ebf0e32826a622f1c488ee Mon Sep 17 00:00:00 2001 From: ly119399 Date: Mon, 20 Jun 2022 16:03:50 +0800 Subject: [PATCH 3/6] dialog modeling ready --- modelscope/utils/constant.py | 2 +- tests/pipelines/nlp/test_dialog_modeling.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 7fbbb190..20ef117b 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -38,7 +38,7 @@ class Tasks(object): token_classification = 'token-classification' conversational = 'conversational' text_generation = 'text-generation' - dialog_modeling = 'dialog_modeling' + dialog_modeling = 'dialog-modeling' dialog_intent_prediction = 'dialog-intent-prediction' table_question_answering = 'table-question-answering' feature_extraction = 'feature-extraction' diff --git a/tests/pipelines/nlp/test_dialog_modeling.py b/tests/pipelines/nlp/test_dialog_modeling.py index 855bdff4..7d4da8fe 100644 --- a/tests/pipelines/nlp/test_dialog_modeling.py +++ b/tests/pipelines/nlp/test_dialog_modeling.py @@ -92,10 +92,9 @@ class DialogModelingTest(unittest.TestCase): } } - # @unittest.skip('test with snapshot_download') + @unittest.skip('test with snapshot_download') def test_run(self): - # cache_path = '/Users/yangliu/Space/maas_model/nlp_space_dialog-modeling' cache_path = snapshot_download(self.model_id) preprocessor = DialogModelingPreprocessor(model_dir=cache_path) @@ -124,12 +123,12 @@ class DialogModelingTest(unittest.TestCase): def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) - preprocessor = DialogGenerationPreprocessor(model_dir=model.model_dir) + preprocessor = DialogModelingPreprocessor(model_dir=model.model_dir) pipelines = [ - DialogGenerationPipeline(model=model, preprocessor=preprocessor), + DialogModelingPipeline(model=model, preprocessor=preprocessor), pipeline( - task=Tasks.dialog_generation, + task=Tasks.dialog_modeling, model=model, preprocessor=preprocessor) ] From 6f8910dbcb5068428981a5aa5e32202b5cfdf293 Mon Sep 17 00:00:00 2001 From: ly119399 Date: Mon, 20 Jun 2022 16:49:32 +0800 Subject: [PATCH 4/6] bug fix --- modelscope/utils/nlp/space/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelscope/utils/nlp/space/utils.py b/modelscope/utils/nlp/space/utils.py index 822305fd..ba956b7d 100644 --- a/modelscope/utils/nlp/space/utils.py +++ b/modelscope/utils/nlp/space/utils.py @@ -169,8 +169,8 @@ class MultiWOZVocab(object): if include_oov: if self._word2idx.get(word, None) is None: raise ValueError( - 'Unknown word: %s. Vocabulary should include oovs here.' % - word) + 'Unknown word: %s. Vocabulary should include oovs here.' + % word) return self._word2idx[word] else: word = '' if word not in self._word2idx else word From b812cb78c9d87037769e2eb9ba59c2ee986a71da Mon Sep 17 00:00:00 2001 From: ly119399 Date: Mon, 20 Jun 2022 17:10:54 +0800 Subject: [PATCH 5/6] add dep --- requirements/nlp.txt | 3 +++ requirements/nlp/space.txt | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 requirements/nlp/space.txt diff --git a/requirements/nlp.txt b/requirements/nlp.txt index 4e146a81..4ec6fe04 100644 --- a/requirements/nlp.txt +++ b/requirements/nlp.txt @@ -1 +1,4 @@ +en_core_web_sm>=2.3.1 https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl +spacy>=2.3.5 +# python -m spacy download en_core_web_sm diff --git a/requirements/nlp/space.txt b/requirements/nlp/space.txt deleted file mode 100644 index 09a0f64e..00000000 --- a/requirements/nlp/space.txt +++ /dev/null @@ -1,2 +0,0 @@ -spacy==2.3.5 -# python -m spacy download en_core_web_sm From c6cf0d20c5c6ad729f88a30ca639df3d484c1e34 Mon Sep 17 00:00:00 2001 From: ly119399 Date: Mon, 20 Jun 2022 17:35:53 +0800 Subject: [PATCH 6/6] add dep --- requirements/nlp.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/nlp.txt b/requirements/nlp.txt index 4ec6fe04..eefb3c7d 100644 --- a/requirements/nlp.txt +++ b/requirements/nlp.txt @@ -1,4 +1,4 @@ -en_core_web_sm>=2.3.1 https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl +https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz spacy>=2.3.5 # python -m spacy download en_core_web_sm