diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index 6c3c17c0..1a4e09ac 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -1,4 +1,4 @@ from .sentence_similarity_model import * # noqa F403 from .sequence_classification_model import * # noqa F403 from .text_generation_model import * # noqa F403 -from .zero_shot_classification_model import * +from .zero_shot_classification_model import * # noqa F403 diff --git a/modelscope/models/nlp/zero_shot_classification_model.py b/modelscope/models/nlp/zero_shot_classification_model.py index 3f658dba..7a940e40 100644 --- a/modelscope/models/nlp/zero_shot_classification_model.py +++ b/modelscope/models/nlp/zero_shot_classification_model.py @@ -1,6 +1,7 @@ from typing import Any, Dict -import torch + import numpy as np +import torch from modelscope.utils.constant import Tasks from ..base import Model @@ -10,7 +11,8 @@ __all__ = ['BertForZeroShotClassification'] @MODELS.register_module( - Tasks.zero_shot_classification, module_name=r'bert-zero-shot-classification') + Tasks.zero_shot_classification, + module_name=r'bert-zero-shot-classification') class BertForZeroShotClassification(Model): def __init__(self, model_dir: str, *args, **kwargs): @@ -40,6 +42,6 @@ class BertForZeroShotClassification(Model): """ with torch.no_grad(): outputs = self.model(**input) - logits = outputs["logits"].numpy() + logits = outputs['logits'].numpy() res = {'logits': logits} return res diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index a4f15de2..009e93d1 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -20,7 +20,8 @@ DEFAULT_MODEL_FOR_PIPELINE = { Tasks.text_classification: ('bert-sentiment-analysis', 'damo/bert-base-sst2'), Tasks.zero_shot_classification: - ('bert-zero-shot-classification', 'damo/nlp_structbert_zero-shot-classification_chinese-base'), + ('bert-zero-shot-classification', + 'damo/nlp_structbert_zero-shot-classification_chinese-base'), Tasks.text_generation: ('palm', 'damo/nlp_palm_text-generation_chinese'), Tasks.image_captioning: ('ofa', None), Tasks.image_generation: diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py index b8a4614f..f791f9ed 100644 --- a/modelscope/pipelines/nlp/__init__.py +++ b/modelscope/pipelines/nlp/__init__.py @@ -1,4 +1,4 @@ from .sentence_similarity_pipeline import * # noqa F403 from .sequence_classification_pipeline import * # noqa F403 from .text_generation_pipeline import * # noqa F403 -from .zero_shot_classification_pipeline import * +from .zero_shot_classification_pipeline import * # noqa F403 diff --git a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py index b557f3f0..1ea500e2 100644 --- a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py +++ b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py @@ -4,6 +4,7 @@ from typing import Any, Dict, Union import json import numpy as np +from scipy.special import softmax from modelscope.models.nlp import BertForZeroShotClassification from modelscope.preprocessors import ZeroShotClassificationPreprocessor @@ -11,7 +12,6 @@ from modelscope.utils.constant import Tasks from ...models import Model from ..base import Input, Pipeline from ..builder import PIPELINES -from scipy.special import softmax __all__ = ['ZeroShotClassificationPipeline'] @@ -39,16 +39,15 @@ class ZeroShotClassificationPipeline(Pipeline): self.entailment_id = 0 self.contradiction_id = 2 - self.candidate_labels = kwargs.pop("candidate_labels") - self.hypothesis_template = kwargs.pop('hypothesis_template', "{}") + self.candidate_labels = kwargs.pop('candidate_labels') + self.hypothesis_template = kwargs.pop('hypothesis_template', '{}') self.multi_label = kwargs.pop('multi_label', False) if preprocessor is None: preprocessor = ZeroShotClassificationPreprocessor( sc_model.model_dir, candidate_labels=self.candidate_labels, - hypothesis_template=self.hypothesis_template - ) + hypothesis_template=self.hypothesis_template) super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: @@ -72,7 +71,7 @@ class ZeroShotClassificationPipeline(Pipeline): reversed_index = list(reversed(scores.argsort())) result = { - "labels": [self.candidate_labels[i] for i in reversed_index], - "scores": [scores[i].item() for i in reversed_index], + 'labels': [self.candidate_labels[i] for i in reversed_index], + 'scores': [scores[i].item() for i in reversed_index], } return result diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index b1344abb..4b1348d0 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -12,8 +12,7 @@ from .builder import PREPROCESSORS __all__ = [ 'Tokenize', 'SequenceClassificationPreprocessor', - 'TextGenerationPreprocessor', - "ZeroShotClassificationPreprocessor" + 'TextGenerationPreprocessor', 'ZeroShotClassificationPreprocessor' ] @@ -190,8 +189,8 @@ class ZeroShotClassificationPreprocessor(Preprocessor): from sofa import SbertTokenizer self.model_dir: str = model_dir self.sequence_length = kwargs.pop('sequence_length', 512) - self.candidate_labels = kwargs.pop("candidate_labels") - self.hypothesis_template = kwargs.pop('hypothesis_template', "{}") + self.candidate_labels = kwargs.pop('candidate_labels') + self.hypothesis_template = kwargs.pop('hypothesis_template', '{}') self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir) @type_assert(object, str) @@ -206,7 +205,8 @@ class ZeroShotClassificationPreprocessor(Preprocessor): Returns: Dict[str, Any]: the preprocessed data """ - pairs = [[data, self.hypothesis_template.format(label)] for label in self.candidate_labels] + pairs = [[data, self.hypothesis_template.format(label)] + for label in self.candidate_labels] features = self.tokenizer( pairs, @@ -214,7 +214,5 @@ class ZeroShotClassificationPreprocessor(Preprocessor): truncation=True, max_length=self.sequence_length, return_tensors='pt', - truncation_strategy='only_first' - ) + truncation_strategy='only_first') return features - diff --git a/tests/pipelines/test_zero_shot_classification.py b/tests/pipelines/test_zero_shot_classification.py index f55324f0..1fe69e5b 100644 --- a/tests/pipelines/test_zero_shot_classification.py +++ b/tests/pipelines/test_zero_shot_classification.py @@ -13,13 +13,13 @@ from modelscope.utils.constant import Tasks class ZeroShotClassificationTest(unittest.TestCase): model_id = 'damo/nlp_structbert_zero-shot-classification_chinese-base' sentence = '全新突破 解放军运20版空中加油机曝光' - candidate_labels = ["文化", "体育", "娱乐", "财经", "家居", "汽车", "教育", "科技", "军事"] + candidate_labels = ['文化', '体育', '娱乐', '财经', '家居', '汽车', '教育', '科技', '军事'] def test_run_from_local(self): cache_path = snapshot_download(self.model_id) - tokenizer = ZeroShotClassificationPreprocessor(cache_path, candidate_labels=self.candidate_labels) - model = BertForZeroShotClassification( - cache_path, tokenizer=tokenizer) + tokenizer = ZeroShotClassificationPreprocessor( + cache_path, candidate_labels=self.candidate_labels) + model = BertForZeroShotClassification(cache_path, tokenizer=tokenizer) pipeline1 = ZeroShotClassificationPipeline( model, preprocessor=tokenizer, @@ -29,8 +29,7 @@ class ZeroShotClassificationTest(unittest.TestCase): Tasks.zero_shot_classification, model=model, preprocessor=tokenizer, - candidate_labels=self.candidate_labels - ) + candidate_labels=self.candidate_labels) print(f'sentence: {self.sentence}\n' f'pipeline1:{pipeline1(input=self.sentence)}') @@ -40,21 +39,20 @@ class ZeroShotClassificationTest(unittest.TestCase): def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) - tokenizer = ZeroShotClassificationPreprocessor(model.model_dir, candidate_labels=self.candidate_labels) + tokenizer = ZeroShotClassificationPreprocessor( + model.model_dir, candidate_labels=self.candidate_labels) pipeline_ins = pipeline( task=Tasks.zero_shot_classification, model=model, preprocessor=tokenizer, - candidate_labels=self.candidate_labels - ) + candidate_labels=self.candidate_labels) print(pipeline_ins(input=self.sentence)) def test_run_with_model_name(self): pipeline_ins = pipeline( task=Tasks.zero_shot_classification, model=self.model_id, - candidate_labels=self.candidate_labels - ) + candidate_labels=self.candidate_labels) print(pipeline_ins(input=self.sentence)) def test_run_with_default_model(self):