From c4b6a23bc96d12152774f616de1c4177f7a84116 Mon Sep 17 00:00:00 2001
From: "yingda.chen" <yingda.chen@alibaba-inc.com>
Date: Mon, 20 Jun 2022 10:54:00 +0800
Subject: [PATCH 1/6] [to #42322933] unify naming for model and pipeline files 
        Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9083378

---
 modelscope/models/nlp/__init__.py                         | 8 ++++----
 ...ation_model.py => bert_for_sequence_classification.py} | 0
 ...xt_generation_model.py => palm_for_text_generation.py} | 2 +-
 ...milarity_model.py => sbert_for_sentence_similarity.py} | 0
 ...ication_model.py => sbert_for_token_classification.py} | 1 -
 modelscope/pipelines/multi_modal/__init__.py              | 2 +-
 .../{image_captioning.py => image_caption_pipeline.py}    | 0
 modelscope/pipelines/nlp/sentence_similarity_pipeline.py  | 3 ---
 .../pipelines/nlp/sequence_classification_pipeline.py     | 3 ---
 modelscope/pipelines/nlp/word_segmentation_pipeline.py    | 2 --
 10 files changed, 6 insertions(+), 15 deletions(-)
 rename modelscope/models/nlp/{sequence_classification_model.py => bert_for_sequence_classification.py} (100%)
 rename modelscope/models/nlp/{text_generation_model.py => palm_for_text_generation.py} (98%)
 rename modelscope/models/nlp/{sentence_similarity_model.py => sbert_for_sentence_similarity.py} (100%)
 rename modelscope/models/nlp/{token_classification_model.py => sbert_for_token_classification.py} (99%)
 rename modelscope/pipelines/multi_modal/{image_captioning.py => image_caption_pipeline.py} (100%)

diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py
index aefcef4a..7129fcb8 100644
--- a/modelscope/models/nlp/__init__.py
+++ b/modelscope/models/nlp/__init__.py
@@ -1,4 +1,4 @@
-from .sentence_similarity_model import *  # noqa F403
-from .sequence_classification_model import *  # noqa F403
-from .text_generation_model import *  # noqa F403
-from .token_classification_model import *  # noqa F403
+from .bert_for_sequence_classification import *  # noqa F403
+from .palm_for_text_generation import *  # noqa F403
+from .sbert_for_sentence_similarity import *  # noqa F403
+from .sbert_for_token_classification import *  # noqa F403
diff --git a/modelscope/models/nlp/sequence_classification_model.py b/modelscope/models/nlp/bert_for_sequence_classification.py
similarity index 100%
rename from modelscope/models/nlp/sequence_classification_model.py
rename to modelscope/models/nlp/bert_for_sequence_classification.py
diff --git a/modelscope/models/nlp/text_generation_model.py b/modelscope/models/nlp/palm_for_text_generation.py
similarity index 98%
rename from modelscope/models/nlp/text_generation_model.py
rename to modelscope/models/nlp/palm_for_text_generation.py
index 8feac691..ffba7265 100644
--- a/modelscope/models/nlp/text_generation_model.py
+++ b/modelscope/models/nlp/palm_for_text_generation.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict
+from typing import Dict
 
 from modelscope.utils.constant import Tasks
 from ..base import Model, Tensor
diff --git a/modelscope/models/nlp/sentence_similarity_model.py b/modelscope/models/nlp/sbert_for_sentence_similarity.py
similarity index 100%
rename from modelscope/models/nlp/sentence_similarity_model.py
rename to modelscope/models/nlp/sbert_for_sentence_similarity.py
diff --git a/modelscope/models/nlp/token_classification_model.py b/modelscope/models/nlp/sbert_for_token_classification.py
similarity index 99%
rename from modelscope/models/nlp/token_classification_model.py
rename to modelscope/models/nlp/sbert_for_token_classification.py
index 43d4aafb..b918dc37 100644
--- a/modelscope/models/nlp/token_classification_model.py
+++ b/modelscope/models/nlp/sbert_for_token_classification.py
@@ -1,4 +1,3 @@
-import os
 from typing import Any, Dict, Union
 
 import numpy as np
diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py
index 7d9a2c59..b1ee121c 100644
--- a/modelscope/pipelines/multi_modal/__init__.py
+++ b/modelscope/pipelines/multi_modal/__init__.py
@@ -1 +1 @@
-from .image_captioning import ImageCaptionPipeline
+from .image_caption_pipeline import ImageCaptionPipeline
diff --git a/modelscope/pipelines/multi_modal/image_captioning.py b/modelscope/pipelines/multi_modal/image_caption_pipeline.py
similarity index 100%
rename from modelscope/pipelines/multi_modal/image_captioning.py
rename to modelscope/pipelines/multi_modal/image_caption_pipeline.py
diff --git a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
index 44d91756..1b630c10 100644
--- a/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
+++ b/modelscope/pipelines/nlp/sentence_similarity_pipeline.py
@@ -1,8 +1,5 @@
-import os
-import uuid
 from typing import Any, Dict, Union
 
-import json
 import numpy as np
 
 from modelscope.models.nlp import SbertForSentenceSimilarity
diff --git a/modelscope/pipelines/nlp/sequence_classification_pipeline.py b/modelscope/pipelines/nlp/sequence_classification_pipeline.py
index 9d2e4273..1dbe2efd 100644
--- a/modelscope/pipelines/nlp/sequence_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/sequence_classification_pipeline.py
@@ -1,8 +1,5 @@
-import os
-import uuid
 from typing import Any, Dict, Union
 
-import json
 import numpy as np
 
 from modelscope.models.nlp import BertForSequenceClassification
diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py
index 49aa112a..1cc08a38 100644
--- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py
+++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py
@@ -1,7 +1,5 @@
 from typing import Any, Dict, Optional, Union
 
-import numpy as np
-
 from modelscope.models import Model
 from modelscope.models.nlp import StructBertForTokenClassification
 from modelscope.preprocessors import TokenClassifcationPreprocessor

From 99fb50369544c244f1045bc880b6a04f300506bd Mon Sep 17 00:00:00 2001
From: "hemu.zp" <hemu.zp@alibaba-inc.com>
Date: Mon, 20 Jun 2022 16:00:31 +0800
Subject: [PATCH 2/6] [to #42322933] Add Palm2.0 model.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

接入支持中英文的 Palm2.0 模型，复用 text-generation-pipeline

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9066550
---
 .../models/nlp/palm_for_text_generation.py    | 25 +++----
 modelscope/pipelines/builder.py               |  3 +-
 .../pipelines/nlp/text_generation_pipeline.py | 34 +++++----
 modelscope/preprocessors/nlp.py               | 11 ++-
 requirements/nlp.txt                          |  2 +-
 requirements/runtime.txt                      |  2 +-
 tests/pipelines/test_text_generation.py       | 72 ++++++++++++-------
 7 files changed, 83 insertions(+), 66 deletions(-)

diff --git a/modelscope/models/nlp/palm_for_text_generation.py b/modelscope/models/nlp/palm_for_text_generation.py
index ffba7265..e5799feb 100644
--- a/modelscope/models/nlp/palm_for_text_generation.py
+++ b/modelscope/models/nlp/palm_for_text_generation.py
@@ -7,7 +7,7 @@ from ..builder import MODELS
 __all__ = ['PalmForTextGeneration']
 
 
-@MODELS.register_module(Tasks.text_generation, module_name=r'palm')
+@MODELS.register_module(Tasks.text_generation, module_name=r'palm2.0')
 class PalmForTextGeneration(Model):
 
     def __init__(self, model_dir: str, *args, **kwargs):
@@ -18,35 +18,26 @@ class PalmForTextGeneration(Model):
             model_cls (Optional[Any], optional): model loader, if None, use the
                 default loader to load model weights, by default None.
         """
-        from sofa import PalmTokenizer
-
         super().__init__(model_dir, *args, **kwargs)
         self.model_dir = model_dir
 
-        from sofa.models.palm import PalmForConditionalGeneration, TextGenerator
-        tokenizer = kwargs.pop('tokenizer',
-                               PalmTokenizer.from_pretrained(model_dir))
+        from sofa.models.palm_v2 import PalmForConditionalGeneration, Translator
         model = PalmForConditionalGeneration.from_pretrained(model_dir)
-        self.generator = TextGenerator(model, tokenizer)
+        self.tokenizer = model.tokenizer
+        self.generator = Translator(model)
 
     def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
         """return the result by the model
 
         Args:
-            input (Dict[str, Any]): the preprocessed data
+            input (Dict[str, Tensor]): the preprocessed data
 
         Returns:
-            Dict[str, np.ndarray]: results
+            Dict[str, Tensor]: results
                 Example:
                     {
-                        'predictions': array([1]), # lable 0-negative 1-positive
-                        'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32),
-                        'logits': array([[-0.53860897,  1.5029076 ]], dtype=float32) # true value
+                        'predictions': Tensor([[1377, 4959, 2785, 6392...])]), # tokens need to be decode by tokenizer
                     }
         """
 
-        encoder_inputs = [
-            input['input_ids'], input['token_type_ids'],
-            input['attention_mask']
-        ]
-        return self.generator(encoder_inputs)
+        return self.generator(**input)
diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py
index c24a7c3e..6e2c791d 100644
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -22,7 +22,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
     Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'),
     Tasks.text_classification:
     ('bert-sentiment-analysis', 'damo/bert-base-sst2'),
-    Tasks.text_generation: ('palm', 'damo/nlp_palm_text-generation_chinese'),
+    Tasks.text_generation: ('palm2.0',
+                            'damo/nlp_palm2.0_text-generation_chinese-base'),
     Tasks.image_captioning: ('ofa', None),
     Tasks.image_generation:
     ('person-image-cartoon',
diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py
index 8b6bf8a9..881e7ea6 100644
--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -10,7 +10,7 @@ from ..builder import PIPELINES
 __all__ = ['TextGenerationPipeline']
 
 
-@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm')
+@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm2.0')
 class TextGenerationPipeline(Pipeline):
 
     def __init__(self,
@@ -23,15 +23,16 @@ class TextGenerationPipeline(Pipeline):
             model (SequenceClassificationModel): a model instance
             preprocessor (SequenceClassificationPreprocessor): a preprocessor instance
         """
-        sc_model = model if isinstance(
+        model = model if isinstance(
             model, PalmForTextGeneration) else Model.from_pretrained(model)
         if preprocessor is None:
             preprocessor = TextGenerationPreprocessor(
-                sc_model.model_dir,
+                model.model_dir,
+                model.tokenizer,
                 first_sequence='sentence',
                 second_sequence=None)
-        super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)
-        self.tokenizer = preprocessor.tokenizer
+        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        self.tokenizer = model.tokenizer
 
     def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
         """process the prediction results
@@ -42,17 +43,20 @@ class TextGenerationPipeline(Pipeline):
         Returns:
             Dict[str, str]: the prediction results
         """
+        replace_tokens_bert = (('[unused0]', ''), ('[PAD]', ''),
+                               ('[unused1]', ''), (r' +', ' '), ('[SEP]', ''),
+                               ('[unused2]', ''), ('[CLS]', ''), ('[UNK]', ''))
+        replace_tokens_roberta = ((r' +', ' '), ('<mask>', '<q>'), ('<pad>',
+                                                                    ''),
+                                  ('<s>', ''), ('</s>', ''), ('<unk>', ' '))
 
-        vocab_size = len(self.tokenizer.vocab)
         pred_list = inputs['predictions']
         pred_ids = pred_list[0][0].cpu().numpy().tolist()
-        for j in range(len(pred_ids)):
-            if pred_ids[j] >= vocab_size:
-                pred_ids[j] = 100
-        pred = self.tokenizer.convert_ids_to_tokens(pred_ids)
-        pred_string = ''.join(pred).replace(
-            '##',
-            '').split('[SEP]')[0].replace('[CLS]',
-                                          '').replace('[SEP]',
-                                                      '').replace('[UNK]', '')
+        pred_string = self.tokenizer.decode(pred_ids)
+        for _old, _new in replace_tokens_bert:
+            pred_string = pred_string.replace(_old, _new)
+        pred_string.strip()
+        for _old, _new in replace_tokens_roberta:
+            pred_string = pred_string.replace(_old, _new)
+        pred_string.strip()
         return {'text': pred_string}
diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py
index 6a4a25fc..9bcaa87c 100644
--- a/modelscope/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -115,17 +115,15 @@ class SequenceClassificationPreprocessor(Preprocessor):
         return rst
 
 
-@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm')
+@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm2.0')
 class TextGenerationPreprocessor(Preprocessor):
 
-    def __init__(self, model_dir: str, *args, **kwargs):
+    def __init__(self, model_dir: str, tokenizer, *args, **kwargs):
         """preprocess the data using the vocab.txt from the `model_dir` path
 
         Args:
             model_dir (str): model path
         """
-        from sofa import PalmTokenizer
-
         super().__init__(*args, **kwargs)
 
         self.model_dir: str = model_dir
@@ -134,7 +132,7 @@ class TextGenerationPreprocessor(Preprocessor):
         self.second_sequence: str = kwargs.pop('second_sequence',
                                                'second_sequence')
         self.sequence_length: int = kwargs.pop('sequence_length', 128)
-        self.tokenizer = PalmTokenizer.from_pretrained(model_dir)
+        self.tokenizer = tokenizer
 
     @type_assert(object, str)
     def __call__(self, data: str) -> Dict[str, Any]:
@@ -153,7 +151,7 @@ class TextGenerationPreprocessor(Preprocessor):
         new_data = {self.first_sequence: data}
         # preprocess the data for the model input
 
-        rst = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []}
+        rst = {'input_ids': [], 'attention_mask': []}
 
         max_seq_length = self.sequence_length
 
@@ -168,7 +166,6 @@ class TextGenerationPreprocessor(Preprocessor):
 
         rst['input_ids'].append(feature['input_ids'])
         rst['attention_mask'].append(feature['attention_mask'])
-        rst['token_type_ids'].append(feature['token_type_ids'])
 
         return {k: torch.tensor(v) for k, v in rst.items()}
 
diff --git a/requirements/nlp.txt b/requirements/nlp.txt
index 8de83798..4e146a81 100644
--- a/requirements/nlp.txt
+++ b/requirements/nlp.txt
@@ -1 +1 @@
-https://alinlp.alibaba-inc.com/pypi/sofa-1.0.1.3-py3-none-any.whl
+https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index dd5616a2..e97352aa 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,7 +1,7 @@
 addict
 datasets
 easydict
-https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.2.dev0-py3-none-any.whl
+https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.4.dev0-py3-none-any.whl
 numpy
 opencv-python-headless
 Pillow>=6.2.0
diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py
index 39d57ff7..fbdd165f 100644
--- a/tests/pipelines/test_text_generation.py
+++ b/tests/pipelines/test_text_generation.py
@@ -12,43 +12,67 @@ from modelscope.utils.test_utils import test_level
 
 
 class TextGenerationTest(unittest.TestCase):
-    model_id = 'damo/nlp_palm_text-generation_chinese'
-    input1 = "今日天气类型='晴'&温度变化趋势='大幅上升'&最低气温='28℃'&最高气温='31℃'&体感='湿热'"
-    input2 = "今日天气类型='多云'&体感='舒适'&最低气温='26℃'&最高气温='30℃'"
+    model_id_zh = 'damo/nlp_palm2.0_text-generation_chinese-base'
+    model_id_en = 'damo/nlp_palm2.0_text-generation_english-base'
+    input_zh = """
+    本文总结了十个可穿戴产品的设计原则，而这些原则，同样也是笔者认为是这个行业最吸引人的地方：
+    1.为人们解决重复性问题；2.从人开始，而不是从机器开始；3.要引起注意，但不要刻意；4.提升用户能力，而不是取代
+    """
+    input_en = """
+    The Director of Public Prosecutions who let off Lord Janner over alleged child sex abuse started
+    her career at a legal chambers when the disgraced Labour peer was a top QC there . Alison Saunders ,
+    54 , sparked outrage last week when she decided the 86-year-old should not face astring of charges
+    of paedophilia against nine children because he has dementia . Today , newly-released documents
+    revealed damning evidence that abuse was covered up by police andsocial workers for more than 20 years .
+    And now it has emerged Mrs Saunders ' law career got off to a flying start when she secured her
+    pupillage -- a barrister 's training contract at 1 Garden Court Chambers in London in 1983 .
+    """
 
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run(self):
-        cache_path = snapshot_download(self.model_id)
-        preprocessor = TextGenerationPreprocessor(
-            cache_path, first_sequence='sentence', second_sequence=None)
-        model = PalmForTextGeneration(
-            cache_path, tokenizer=preprocessor.tokenizer)
-        pipeline1 = TextGenerationPipeline(model, preprocessor)
-        pipeline2 = pipeline(
-            Tasks.text_generation, model=model, preprocessor=preprocessor)
-        print(f'input: {self.input1}\npipeline1: {pipeline1(self.input1)}')
-        print()
-        print(f'input: {self.input2}\npipeline2: {pipeline2(self.input2)}')
+        for model_id, input in ((self.model_id_zh, self.input_zh),
+                                (self.model_id_en, self.input_en)):
+            cache_path = snapshot_download(model_id)
+            model = PalmForTextGeneration(cache_path)
+            preprocessor = TextGenerationPreprocessor(
+                cache_path,
+                model.tokenizer,
+                first_sequence='sentence',
+                second_sequence=None)
+            pipeline1 = TextGenerationPipeline(model, preprocessor)
+            pipeline2 = pipeline(
+                Tasks.text_generation, model=model, preprocessor=preprocessor)
+            print(
+                f'pipeline1: {pipeline1(input)}\npipeline2: {pipeline2(input)}'
+            )
 
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_run_with_model_from_modelhub(self):
-        model = Model.from_pretrained(self.model_id)
-        preprocessor = TextGenerationPreprocessor(
-            model.model_dir, first_sequence='sentence', second_sequence=None)
-        pipeline_ins = pipeline(
-            task=Tasks.text_generation, model=model, preprocessor=preprocessor)
-        print(pipeline_ins(self.input1))
+        for model_id, input in ((self.model_id_zh, self.input_zh),
+                                (self.model_id_en, self.input_en)):
+            model = Model.from_pretrained(model_id)
+            preprocessor = TextGenerationPreprocessor(
+                model.model_dir,
+                model.tokenizer,
+                first_sequence='sentence',
+                second_sequence=None)
+            pipeline_ins = pipeline(
+                task=Tasks.text_generation,
+                model=model,
+                preprocessor=preprocessor)
+            print(pipeline_ins(input))
 
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_run_with_model_name(self):
-        pipeline_ins = pipeline(
-            task=Tasks.text_generation, model=self.model_id)
-        print(pipeline_ins(self.input2))
+        for model_id, input in ((self.model_id_zh, self.input_zh),
+                                (self.model_id_en, self.input_en)):
+            pipeline_ins = pipeline(task=Tasks.text_generation, model=model_id)
+            print(pipeline_ins(input))
 
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_run_with_default_model(self):
         pipeline_ins = pipeline(task=Tasks.text_generation)
-        print(pipeline_ins(self.input2))
+        print(pipeline_ins(self.input_zh))
 
 
 if __name__ == '__main__':

From c99f3a9b8c0ede1578ebf0e32826a622f1c488ee Mon Sep 17 00:00:00 2001
From: ly119399 <ly119399@alibaba-inc.com>
Date: Mon, 20 Jun 2022 16:03:50 +0800
Subject: [PATCH 3/6] dialog modeling ready

---
 modelscope/utils/constant.py                | 2 +-
 tests/pipelines/nlp/test_dialog_modeling.py | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index 7fbbb190..20ef117b 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -38,7 +38,7 @@ class Tasks(object):
     token_classification = 'token-classification'
     conversational = 'conversational'
     text_generation = 'text-generation'
-    dialog_modeling = 'dialog_modeling'
+    dialog_modeling = 'dialog-modeling'
     dialog_intent_prediction = 'dialog-intent-prediction'
     table_question_answering = 'table-question-answering'
     feature_extraction = 'feature-extraction'
diff --git a/tests/pipelines/nlp/test_dialog_modeling.py b/tests/pipelines/nlp/test_dialog_modeling.py
index 855bdff4..7d4da8fe 100644
--- a/tests/pipelines/nlp/test_dialog_modeling.py
+++ b/tests/pipelines/nlp/test_dialog_modeling.py
@@ -92,10 +92,9 @@ class DialogModelingTest(unittest.TestCase):
         }
     }
 
-    # @unittest.skip('test with snapshot_download')
+    @unittest.skip('test with snapshot_download')
     def test_run(self):
 
-        # cache_path = '/Users/yangliu/Space/maas_model/nlp_space_dialog-modeling'
         cache_path = snapshot_download(self.model_id)
 
         preprocessor = DialogModelingPreprocessor(model_dir=cache_path)
@@ -124,12 +123,12 @@ class DialogModelingTest(unittest.TestCase):
 
     def test_run_with_model_from_modelhub(self):
         model = Model.from_pretrained(self.model_id)
-        preprocessor = DialogGenerationPreprocessor(model_dir=model.model_dir)
+        preprocessor = DialogModelingPreprocessor(model_dir=model.model_dir)
 
         pipelines = [
-            DialogGenerationPipeline(model=model, preprocessor=preprocessor),
+            DialogModelingPipeline(model=model, preprocessor=preprocessor),
             pipeline(
-                task=Tasks.dialog_generation,
+                task=Tasks.dialog_modeling,
                 model=model,
                 preprocessor=preprocessor)
         ]

From 6f8910dbcb5068428981a5aa5e32202b5cfdf293 Mon Sep 17 00:00:00 2001
From: ly119399 <ly119399@alibaba-inc.com>
Date: Mon, 20 Jun 2022 16:49:32 +0800
Subject: [PATCH 4/6] bug fix

---
 modelscope/utils/nlp/space/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modelscope/utils/nlp/space/utils.py b/modelscope/utils/nlp/space/utils.py
index 822305fd..ba956b7d 100644
--- a/modelscope/utils/nlp/space/utils.py
+++ b/modelscope/utils/nlp/space/utils.py
@@ -169,8 +169,8 @@ class MultiWOZVocab(object):
         if include_oov:
             if self._word2idx.get(word, None) is None:
                 raise ValueError(
-                    'Unknown word: %s. Vocabulary should include oovs here.' %
-                    word)
+                    'Unknown word: %s. Vocabulary should include oovs here.'
+                    % word)
             return self._word2idx[word]
         else:
             word = '<unk>' if word not in self._word2idx else word

From b812cb78c9d87037769e2eb9ba59c2ee986a71da Mon Sep 17 00:00:00 2001
From: ly119399 <ly119399@alibaba-inc.com>
Date: Mon, 20 Jun 2022 17:10:54 +0800
Subject: [PATCH 5/6] add dep

---
 requirements/nlp.txt       | 3 +++
 requirements/nlp/space.txt | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)
 delete mode 100644 requirements/nlp/space.txt

diff --git a/requirements/nlp.txt b/requirements/nlp.txt
index 4e146a81..4ec6fe04 100644
--- a/requirements/nlp.txt
+++ b/requirements/nlp.txt
@@ -1 +1,4 @@
+en_core_web_sm>=2.3.1
 https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl
+spacy>=2.3.5
+# python -m spacy download en_core_web_sm
diff --git a/requirements/nlp/space.txt b/requirements/nlp/space.txt
deleted file mode 100644
index 09a0f64e..00000000
--- a/requirements/nlp/space.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-spacy==2.3.5
-# python -m spacy download en_core_web_sm

From c6cf0d20c5c6ad729f88a30ca639df3d484c1e34 Mon Sep 17 00:00:00 2001
From: ly119399 <ly119399@alibaba-inc.com>
Date: Mon, 20 Jun 2022 17:35:53 +0800
Subject: [PATCH 6/6] add dep

---
 requirements/nlp.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/nlp.txt b/requirements/nlp.txt
index 4ec6fe04..eefb3c7d 100644
--- a/requirements/nlp.txt
+++ b/requirements/nlp.txt
@@ -1,4 +1,4 @@
-en_core_web_sm>=2.3.1
 https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl
+https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
 spacy>=2.3.5
 # python -m spacy download en_core_web_sm