Browse Source

registry multi models on model and pipeline

master
智丞 3 years ago
parent
commit
951077c729
4 changed files with 87 additions and 61 deletions
  1. +9
    -8
      modelscope/models/nlp/masked_language_model.py
  2. +2
    -2
      modelscope/pipelines/builder.py
  3. +18
    -11
      modelscope/pipelines/nlp/fill_mask_pipeline.py
  4. +58
    -40
      tests/pipelines/test_fill_mask.py

+ 9
- 8
modelscope/models/nlp/masked_language_model.py View File

@@ -1,14 +1,16 @@
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union


import numpy as np import numpy as np

from ...utils.constant import Tasks
from ..base import Model, Tensor from ..base import Model, Tensor
from ..builder import MODELS from ..builder import MODELS
from ...utils.constant import Tasks


__all__ = ['MaskedLanguageModel'] __all__ = ['MaskedLanguageModel']




@MODELS.register_module(Tasks.fill_mask, module_name=r'sbert') @MODELS.register_module(Tasks.fill_mask, module_name=r'sbert')
@MODELS.register_module(Tasks.fill_mask, module_name=r'veco')
class MaskedLanguageModel(Model): class MaskedLanguageModel(Model):


def __init__(self, model_dir: str, *args, **kwargs): def __init__(self, model_dir: str, *args, **kwargs):
@@ -17,8 +19,8 @@ class MaskedLanguageModel(Model):
super().__init__(model_dir, *args, **kwargs) super().__init__(model_dir, *args, **kwargs)


self.config = AutoConfig.from_pretrained(model_dir) self.config = AutoConfig.from_pretrained(model_dir)
self.model = AutoModelForMaskedLM.from_pretrained(model_dir, config=self.config)
self.model = AutoModelForMaskedLM.from_pretrained(
model_dir, config=self.config)


def forward(self, inputs: Dict[str, Tensor]) -> Dict[str, np.ndarray]: def forward(self, inputs: Dict[str, Tensor]) -> Dict[str, np.ndarray]:
"""return the result by the model """return the result by the model
@@ -35,9 +37,8 @@ class MaskedLanguageModel(Model):
'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value
} }
""" """
rst = self.model(
input_ids=inputs["input_ids"],
attention_mask=inputs['attention_mask'],
token_type_ids=inputs["token_type_ids"]
)
rst = self.model(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
token_type_ids=inputs['token_type_ids'])
return {'logits': rst['logits'], 'input_ids': inputs['input_ids']} return {'logits': rst['logits'], 'input_ids': inputs['input_ids']}

+ 2
- 2
modelscope/pipelines/builder.py View File

@@ -24,8 +24,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
Tasks.image_generation: Tasks.image_generation:
('person-image-cartoon', ('person-image-cartoon',
'damo/cv_unet_person-image-cartoon_compound-models'), 'damo/cv_unet_person-image-cartoon_compound-models'),
Tasks.fill_mask:
('sbert')
Tasks.fill_mask: ('sbert', 'damo/nlp_structbert_fill-mask_chinese-large'),
Tasks.fill_mask: ('veco', 'damo/nlp_veco_fill-mask_large')
} }






+ 18
- 11
modelscope/pipelines/nlp/fill_mask_pipeline.py View File

@@ -10,6 +10,7 @@ __all__ = ['FillMaskPipeline']




@PIPELINES.register_module(Tasks.fill_mask, module_name=r'sbert') @PIPELINES.register_module(Tasks.fill_mask, module_name=r'sbert')
@PIPELINES.register_module(Tasks.fill_mask, module_name=r'veco')
class FillMaskPipeline(Pipeline): class FillMaskPipeline(Pipeline):


def __init__(self, model: MaskedLanguageModel, def __init__(self, model: MaskedLanguageModel,
@@ -36,22 +37,28 @@ class FillMaskPipeline(Pipeline):
Dict[str, str]: the prediction results Dict[str, str]: the prediction results
""" """
import numpy as np import numpy as np
logits = inputs["logits"].detach().numpy()
input_ids = inputs["input_ids"].detach().numpy()
logits = inputs['logits'].detach().numpy()
input_ids = inputs['input_ids'].detach().numpy()
pred_ids = np.argmax(logits, axis=-1) pred_ids = np.argmax(logits, axis=-1)
rst_ids = np.where(input_ids==self.mask_id[self.model.config.model_type], pred_ids, input_ids)
rst_ids = np.where(
input_ids == self.mask_id[self.model.config.model_type], pred_ids,
input_ids)
pred_strings = [] pred_strings = []
for ids in rst_ids: for ids in rst_ids:
if self.model.config.model_type == 'veco': if self.model.config.model_type == 'veco':
pred_string = self.tokenizer.decode(ids).split('</s>')[0].replace("<s>", "").replace("</s>", "").replace("<pad>", "")
elif self.model.config.vocab_size == 21128: # zh bert
pred_string = self.tokenizer.decode(ids).split(
'</s>')[0].replace('<s>',
'').replace('</s>',
'').replace('<pad>', '')
elif self.model.config.vocab_size == 21128: # zh bert
pred_string = self.tokenizer.convert_ids_to_tokens(ids) pred_string = self.tokenizer.convert_ids_to_tokens(ids)
pred_string = ''.join(pred_string).replace('##','')
pred_string = pred_string.split('[SEP]')[0].replace('[CLS]', '').replace('[SEP]', '').replace('[UNK]', '')
else: ## en bert
pred_string = ''.join(pred_string).replace('##', '')
pred_string = pred_string.split('[SEP]')[0].replace(
'[CLS]', '').replace('[SEP]', '').replace('[UNK]', '')
else: ## en bert
pred_string = self.tokenizer.decode(ids) pred_string = self.tokenizer.decode(ids)
pred_string = pred_string.split('[SEP]')[0].replace('[CLS]', '').replace('[SEP]', '').replace('[UNK]', '')
pred_string = pred_string.split('[SEP]')[0].replace(
'[CLS]', '').replace('[SEP]', '').replace('[UNK]', '')
pred_strings.append(pred_string) pred_strings.append(pred_string)


return {'pred_string': pred_strings}

return {'pred_string': pred_strings}

+ 58
- 40
tests/pipelines/test_fill_mask.py View File

@@ -5,24 +5,41 @@ import unittest


from maas_hub.snapshot_download import snapshot_download from maas_hub.snapshot_download import snapshot_download


from modelscope.models import Model
from modelscope.models.nlp import MaskedLanguageModel from modelscope.models.nlp import MaskedLanguageModel
from modelscope.pipelines import FillMaskPipeline, pipeline from modelscope.pipelines import FillMaskPipeline, pipeline
from modelscope.preprocessors import FillMaskPreprocessor from modelscope.preprocessors import FillMaskPreprocessor
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.models import Model
from modelscope.utils.hub import get_model_cache_dir from modelscope.utils.hub import get_model_cache_dir
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level



class FillMaskTest(unittest.TestCase): class FillMaskTest(unittest.TestCase):
model_id_sbert = {'zh': 'damo/nlp_structbert_fill-mask-chinese_large',
'en': 'damo/nlp_structbert_fill-mask-english_large'}
model_id_sbert = {
'zh': 'damo/nlp_structbert_fill-mask-chinese_large',
'en': 'damo/nlp_structbert_fill-mask-english_large'
}
model_id_veco = 'damo/nlp_veco_fill-mask_large' model_id_veco = 'damo/nlp_veco_fill-mask_large'


ori_texts = {"zh": "段誉轻挥折扇,摇了摇头,说道:“你师父是你的师父,你师父可不是我的师父。你师父差得动你,你师父可差不动我。",
"en": "Everything in what you call reality is really just a reflection of your consciousness. Your whole universe is just a mirror reflection of your story."}
ori_texts = {
'zh':
f'段誉轻挥折扇,摇了摇头,说道:“你师父是你的师父,你师父可不是我的师父。'
f'你师父差得动你,你师父可差不动我。',
'en':
f'Everything in what you call reality is really just a r'
f'eflection of your consciousness. Your whole universe is'
f'just a mirror reflection of your story.'
}


test_inputs = {"zh": "段誉轻[MASK]折扇,摇了摇[MASK],[MASK]道:“你师父是你的[MASK][MASK],你师父可不是[MASK]的师父。你师父差得动你,你师父可[MASK]不动我。",
"en": "Everything in [MASK] you call reality is really [MASK] a reflection of your [MASK]. Your whole universe is just a mirror [MASK] of your story."}
test_inputs = {
'zh':
f'段誉轻[MASK]折扇,摇了摇[MASK],[MASK]道:“你师父是你的[MASK][MASK]'
f',你师父可不是[MASK]的师父。你师父差得动你,你师父可[MASK]不动我。',
'en':
f'Everything in [MASK] you call reality is really [MASK] a '
f'reflection of your [MASK]. Your whole universe is just a '
f'mirror [MASK] of your story.'
}


#def test_run(self): #def test_run(self):
# # sbert # # sbert
@@ -37,51 +54,52 @@ class FillMaskTest(unittest.TestCase):
# ori_text = self.ori_texts[language] # ori_text = self.ori_texts[language]
# test_input = self.test_inputs[language] # test_input = self.test_inputs[language]
# print( # print(
# f'ori_text: {ori_text}\ninput: {test_input}\npipeline1: {pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}'
# f'ori_text: {ori_text}\ninput: {test_input}\npipeline1: '
# f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}'
# ) # )


## veco
#model_dir = snapshot_download(self.model_id_veco)
#preprocessor = FillMaskPreprocessor(
# model_dir, first_sequence='sentence', second_sequence=None)
#model = MaskedLanguageModel(model_dir)
#pipeline1 = FillMaskPipeline(model, preprocessor)
#pipeline2 = pipeline(
# Tasks.fill_mask, model=model, preprocessor=preprocessor)
#for language in ["zh", "en"]:
# ori_text = self.ori_texts[language]
# test_input = self.test_inputs["zh"].replace("[MASK]", "<mask>")
# print(
# f'ori_text: {ori_text}\ninput: {test_input}\npipeline1: {pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}'
## veco
#model_dir = snapshot_download(self.model_id_veco)
#preprocessor = FillMaskPreprocessor(
# model_dir, first_sequence='sentence', second_sequence=None)
#model = MaskedLanguageModel(model_dir)
#pipeline1 = FillMaskPipeline(model, preprocessor)
#pipeline2 = pipeline(
# Tasks.fill_mask, model=model, preprocessor=preprocessor)
#for language in ["zh", "en"]:
# ori_text = self.ori_texts[language]
# test_input = self.test_inputs["zh"].replace("[MASK]", "<mask>")
# print(
# f'ori_text: {ori_text}\ninput: {test_input}\npipeline1: '
# f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}'


def test_run_with_model_from_modelhub(self): def test_run_with_model_from_modelhub(self):
for language in ["zh"]:
for language in ['zh']:
print(self.model_id_sbert[language]) print(self.model_id_sbert[language])
model = Model.from_pretrained(self.model_id_sbert[language]) model = Model.from_pretrained(self.model_id_sbert[language])
print("model", model.model_dir)
print('model', model.model_dir)
preprocessor = FillMaskPreprocessor( preprocessor = FillMaskPreprocessor(
model.model_dir, first_sequence='sentence', second_sequence=None)
model.model_dir,
first_sequence='sentence',
second_sequence=None)
pipeline_ins = pipeline( pipeline_ins = pipeline(
task=Tasks.fill_mask, model=model, preprocessor=preprocessor)
print(pipeline_ins(self_test_inputs[language]))

task=Tasks.fill_mask, model=model, preprocessor=preprocessor)
print(pipeline_ins(self.test_inputs[language]))


#def test_run_with_model_name(self): #def test_run_with_model_name(self):
## veco
#pipeline_ins = pipeline(
# task=Tasks.fill_mask, model=self.model_id_veco)
#for language in ["zh", "en"]:
# input_ = self.test_inputs[language].replace("[MASK]", "<mask>")
# print(pipeline_ins(input_))
## veco
#pipeline_ins = pipeline(
# task=Tasks.fill_mask, model=self.model_id_veco)
#for language in ["zh", "en"]:
# input_ = self.test_inputs[language].replace("[MASK]", "<mask>")
# print(pipeline_ins(input_))


## structBert
#for language in ["zh"]:
# pipeline_ins = pipeline(
# task=Tasks.fill_mask, model=self.model_id_sbert[language])
# print(pipeline_ins(self_test_inputs[language]))
## structBert
#for language in ["zh"]:
# pipeline_ins = pipeline(
# task=Tasks.fill_mask, model=self.model_id_sbert[language])
# print(pipeline_ins(self_test_inputs[language]))




if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()


Loading…
Cancel
Save