Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10931263master^2
| @@ -40,6 +40,7 @@ if TYPE_CHECKING: | |||||
| InformationExtractionModel, | InformationExtractionModel, | ||||
| LSTMCRFForNamedEntityRecognition, | LSTMCRFForNamedEntityRecognition, | ||||
| LSTMCRFForWordSegmentation, | LSTMCRFForWordSegmentation, | ||||
| LSTMCRFForPartOfSpeech, | |||||
| SequenceClassificationModel, | SequenceClassificationModel, | ||||
| SingleBackboneTaskModelBase, | SingleBackboneTaskModelBase, | ||||
| TaskModelForTextGeneration, | TaskModelForTextGeneration, | ||||
| @@ -95,6 +96,7 @@ else: | |||||
| 'InformationExtractionModel', | 'InformationExtractionModel', | ||||
| 'LSTMCRFForNamedEntityRecognition', | 'LSTMCRFForNamedEntityRecognition', | ||||
| 'LSTMCRFForWordSegmentation', | 'LSTMCRFForWordSegmentation', | ||||
| 'LSTMCRFForPartOfSpeech', | |||||
| 'SequenceClassificationModel', | 'SequenceClassificationModel', | ||||
| 'SingleBackboneTaskModelBase', | 'SingleBackboneTaskModelBase', | ||||
| 'TaskModelForTextGeneration', | 'TaskModelForTextGeneration', | ||||
| @@ -421,9 +421,9 @@ class MOELayer(Base): | |||||
| self.use_expert_residual_network = use_expert_residual_network | self.use_expert_residual_network = use_expert_residual_network | ||||
| if self.use_expert_residual_network: | if self.use_expert_residual_network: | ||||
| self.expert_network = nn.Sequential( | |||||
| *([ExpertResidualLayer(self.gate.model_dim) | |||||
| for _ in range(6)])) | |||||
| self.expert_network = nn.Sequential(*([ | |||||
| ExpertResidualLayer(self.gate.model_dim) for _ in range(6) | |||||
| ])) # noqa | |||||
| self.use_tutel = use_tutel and TUTEL_INSTALLED | self.use_tutel = use_tutel and TUTEL_INSTALLED | ||||
| @@ -10,6 +10,7 @@ if TYPE_CHECKING: | |||||
| from .nncrf_for_named_entity_recognition import ( | from .nncrf_for_named_entity_recognition import ( | ||||
| LSTMCRFForNamedEntityRecognition, | LSTMCRFForNamedEntityRecognition, | ||||
| LSTMCRFForWordSegmentation, | LSTMCRFForWordSegmentation, | ||||
| LSTMCRFForPartOfSpeech, | |||||
| TransformerCRFForNamedEntityRecognition, | TransformerCRFForNamedEntityRecognition, | ||||
| TransformerCRFForWordSegmentation, | TransformerCRFForWordSegmentation, | ||||
| ) | ) | ||||
| @@ -26,6 +27,7 @@ else: | |||||
| 'nncrf_for_named_entity_recognition': [ | 'nncrf_for_named_entity_recognition': [ | ||||
| 'LSTMCRFForNamedEntityRecognition', | 'LSTMCRFForNamedEntityRecognition', | ||||
| 'LSTMCRFForWordSegmentation', | 'LSTMCRFForWordSegmentation', | ||||
| 'LSTMCRFForPartOfSpeech', | |||||
| 'TransformerCRFForNamedEntityRecognition', | 'TransformerCRFForNamedEntityRecognition', | ||||
| 'TransformerCRFForWordSegmentation', | 'TransformerCRFForWordSegmentation', | ||||
| ], | ], | ||||
| @@ -17,7 +17,8 @@ from modelscope.utils.constant import ModelFile, Tasks | |||||
| __all__ = [ | __all__ = [ | ||||
| 'TransformerCRFForNamedEntityRecognition', | 'TransformerCRFForNamedEntityRecognition', | ||||
| 'LSTMCRFForNamedEntityRecognition' | |||||
| 'LSTMCRFForNamedEntityRecognition', 'LSTMCRFForWordSegmentation', | |||||
| 'LSTMCRFForPartOfSpeech' | |||||
| ] | ] | ||||
| @@ -193,10 +194,16 @@ class LSTMCRFForNamedEntityRecognition( | |||||
| @MODELS.register_module(Tasks.word_segmentation, module_name=Models.lcrf_wseg) | @MODELS.register_module(Tasks.word_segmentation, module_name=Models.lcrf_wseg) | ||||
| @MODELS.register_module(Tasks.word_segmentation, module_name=Models.lcrf) | |||||
| class LSTMCRFForWordSegmentation(LSTMCRFForNamedEntityRecognition): | class LSTMCRFForWordSegmentation(LSTMCRFForNamedEntityRecognition): | ||||
| pass | pass | ||||
| @MODELS.register_module(Tasks.part_of_speech, module_name=Models.lcrf) | |||||
| class LSTMCRFForPartOfSpeech(LSTMCRFForNamedEntityRecognition): | |||||
| pass | |||||
| class TransformerCRF(nn.Module): | class TransformerCRF(nn.Module): | ||||
| """A transformer based model to NER tasks. | """A transformer based model to NER tasks. | ||||
| @@ -123,6 +123,10 @@ PREPROCESSOR_MAP = { | |||||
| # taskmodels | # taskmodels | ||||
| (Models.lcrf, Tasks.named_entity_recognition): | (Models.lcrf, Tasks.named_entity_recognition): | ||||
| Preprocessors.sequence_labeling_tokenizer, | Preprocessors.sequence_labeling_tokenizer, | ||||
| (Models.lcrf, Tasks.word_segmentation): | |||||
| Preprocessors.sequence_labeling_tokenizer, | |||||
| (Models.lcrf, Tasks.part_of_speech): | |||||
| Preprocessors.sequence_labeling_tokenizer, | |||||
| (Models.lcrf_wseg, Tasks.word_segmentation): | (Models.lcrf_wseg, Tasks.word_segmentation): | ||||
| Preprocessors.sequence_labeling_tokenizer, | Preprocessors.sequence_labeling_tokenizer, | ||||
| (Models.tcrf_wseg, Tasks.word_segmentation): | (Models.tcrf_wseg, Tasks.word_segmentation): | ||||
| @@ -4,7 +4,8 @@ import unittest | |||||
| from modelscope.hub.snapshot_download import snapshot_download | from modelscope.hub.snapshot_download import snapshot_download | ||||
| from modelscope.models import Model | from modelscope.models import Model | ||||
| from modelscope.models.nlp import TokenClassificationModel | |||||
| from modelscope.models.nlp import (LSTMCRFForPartOfSpeech, | |||||
| TokenClassificationModel) | |||||
| from modelscope.pipelines import pipeline | from modelscope.pipelines import pipeline | ||||
| from modelscope.pipelines.nlp import TokenClassificationPipeline | from modelscope.pipelines.nlp import TokenClassificationPipeline | ||||
| from modelscope.preprocessors import \ | from modelscope.preprocessors import \ | ||||
| @@ -15,6 +16,7 @@ from modelscope.utils.test_utils import test_level | |||||
| class PartOfSpeechTest(unittest.TestCase): | class PartOfSpeechTest(unittest.TestCase): | ||||
| model_id = 'damo/nlp_structbert_part-of-speech_chinese-lite' | model_id = 'damo/nlp_structbert_part-of-speech_chinese-lite' | ||||
| lstmcrf_news_model_id = 'damo/nlp_lstmcrf_part-of-speech_chinese-news' | |||||
| sentence = '今天天气不错,适合出去游玩' | sentence = '今天天气不错,适合出去游玩' | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| @@ -30,7 +32,20 @@ class PartOfSpeechTest(unittest.TestCase): | |||||
| print() | print() | ||||
| print(f'pipeline2: {pipeline2(input=self.sentence)}') | print(f'pipeline2: {pipeline2(input=self.sentence)}') | ||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_lstmcrf_news_by_direct_model_download(self): | |||||
| cache_path = snapshot_download(self.lstmcrf_news_model_id) | |||||
| tokenizer = TokenClassificationTransformersPreprocessor(cache_path) | |||||
| model = LSTMCRFForPartOfSpeech.from_pretrained(cache_path) | |||||
| pipeline1 = TokenClassificationPipeline(model, preprocessor=tokenizer) | |||||
| pipeline2 = pipeline( | |||||
| Tasks.part_of_speech, model=model, preprocessor=tokenizer) | |||||
| print(f'sentence: {self.sentence}\n' | |||||
| f'pipeline1:{pipeline1(input=self.sentence)}') | |||||
| print() | |||||
| print(f'pipeline2: {pipeline2(input=self.sentence)}') | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_run_with_model_from_modelhub(self): | def test_run_with_model_from_modelhub(self): | ||||
| model = Model.from_pretrained(self.model_id) | model = Model.from_pretrained(self.model_id) | ||||
| tokenizer = TokenClassificationTransformersPreprocessor( | tokenizer = TokenClassificationTransformersPreprocessor( | ||||
| @@ -40,11 +55,26 @@ class PartOfSpeechTest(unittest.TestCase): | |||||
| print(pipeline_ins(input=self.sentence)) | print(pipeline_ins(input=self.sentence)) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||
| def test_run_lstmcrf_news_with_model_from_modelhub(self): | |||||
| model = Model.from_pretrained(self.lstmcrf_news_model_id) | |||||
| tokenizer = TokenClassificationTransformersPreprocessor( | |||||
| model.model_dir) | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.part_of_speech, model=model, preprocessor=tokenizer) | |||||
| print(pipeline_ins(input=self.sentence)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_with_model_name(self): | def test_run_with_model_name(self): | ||||
| pipeline_ins = pipeline(task=Tasks.part_of_speech, model=self.model_id) | pipeline_ins = pipeline(task=Tasks.part_of_speech, model=self.model_id) | ||||
| print(pipeline_ins(input=self.sentence)) | print(pipeline_ins(input=self.sentence)) | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_lstmcrf_new_with_model_name(self): | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.part_of_speech, model=self.lstmcrf_news_model_id) | |||||
| print(pipeline_ins(input=self.sentence)) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_run_with_default_model(self): | def test_run_with_default_model(self): | ||||
| pipeline_ins = pipeline(task=Tasks.part_of_speech) | pipeline_ins = pipeline(task=Tasks.part_of_speech) | ||||
| print(pipeline_ins(input=self.sentence)) | print(pipeline_ins(input=self.sentence)) | ||||
| @@ -3,7 +3,8 @@ import unittest | |||||
| from modelscope.hub.snapshot_download import snapshot_download | from modelscope.hub.snapshot_download import snapshot_download | ||||
| from modelscope.models import Model | from modelscope.models import Model | ||||
| from modelscope.models.nlp import SbertForTokenClassification | |||||
| from modelscope.models.nlp import (LSTMCRFForWordSegmentation, | |||||
| SbertForTokenClassification) | |||||
| from modelscope.pipelines import pipeline | from modelscope.pipelines import pipeline | ||||
| from modelscope.pipelines.nlp import WordSegmentationPipeline | from modelscope.pipelines.nlp import WordSegmentationPipeline | ||||
| from modelscope.preprocessors import \ | from modelscope.preprocessors import \ | ||||
| @@ -19,8 +20,12 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| def setUp(self) -> None: | def setUp(self) -> None: | ||||
| self.task = Tasks.word_segmentation | self.task = Tasks.word_segmentation | ||||
| self.model_id = 'damo/nlp_structbert_word-segmentation_chinese-base' | self.model_id = 'damo/nlp_structbert_word-segmentation_chinese-base' | ||||
| self.ecom_model_id = 'damo/nlp_structbert_word-segmentation_chinese-base-ecommerce' | |||||
| self.lstmcrf_news_model_id = 'damo/nlp_lstmcrf_word-segmentation_chinese-news' | |||||
| self.lstmcrf_ecom_model_id = 'damo/nlp_lstmcrf_word-segmentation_chinese-ecommerce' | |||||
| sentence = '今天天气不错,适合出去游玩' | sentence = '今天天气不错,适合出去游玩' | ||||
| sentence_ecom = '东阳草肌醇复合物' | |||||
| sentence_eng = 'I am a program.' | sentence_eng = 'I am a program.' | ||||
| regress_tool = MsRegressTool(baseline=False) | regress_tool = MsRegressTool(baseline=False) | ||||
| @@ -36,7 +41,43 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| f'pipeline1:{pipeline1(input=self.sentence)}') | f'pipeline1:{pipeline1(input=self.sentence)}') | ||||
| print(f'pipeline2: {pipeline2(input=self.sentence)}') | print(f'pipeline2: {pipeline2(input=self.sentence)}') | ||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_ecom_by_direct_model_download(self): | |||||
| cache_path = snapshot_download(self.ecom_model_id) | |||||
| tokenizer = TokenClassificationTransformersPreprocessor(cache_path) | |||||
| model = SbertForTokenClassification.from_pretrained(cache_path) | |||||
| pipeline1 = WordSegmentationPipeline(model, preprocessor=tokenizer) | |||||
| pipeline2 = pipeline( | |||||
| Tasks.word_segmentation, model=model, preprocessor=tokenizer) | |||||
| print(f'sentence: {self.sentence_ecom}\n' | |||||
| f'pipeline1:{pipeline1(input=self.sentence_ecom)}') | |||||
| print(f'pipeline2: {pipeline2(input=self.sentence_ecom)}') | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_lstmcrf_news_by_direct_model_download(self): | |||||
| cache_path = snapshot_download(self.lstmcrf_news_model_id) | |||||
| tokenizer = TokenClassificationTransformersPreprocessor(cache_path) | |||||
| model = LSTMCRFForWordSegmentation(cache_path, tokenizer=tokenizer) | |||||
| pipeline1 = WordSegmentationPipeline(model, preprocessor=tokenizer) | |||||
| pipeline2 = pipeline( | |||||
| Tasks.word_segmentation, model=model, preprocessor=tokenizer) | |||||
| print(f'sentence: {self.sentence}\n' | |||||
| f'pipeline1:{pipeline1(input=self.sentence)}') | |||||
| print(f'pipeline2: {pipeline2(input=self.sentence)}') | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| def test_run_lstmcrf_ecom_by_direct_model_download(self): | |||||
| cache_path = snapshot_download(self.lstmcrf_ecom_model_id) | |||||
| tokenizer = TokenClassificationTransformersPreprocessor(cache_path) | |||||
| model = LSTMCRFForWordSegmentation(cache_path, tokenizer=tokenizer) | |||||
| pipeline1 = WordSegmentationPipeline(model, preprocessor=tokenizer) | |||||
| pipeline2 = pipeline( | |||||
| Tasks.word_segmentation, model=model, preprocessor=tokenizer) | |||||
| print(f'sentence: {self.sentence_ecom}\n' | |||||
| f'pipeline1:{pipeline1(input=self.sentence_ecom)}') | |||||
| print(f'pipeline2: {pipeline2(input=self.sentence_ecom)}') | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_run_with_model_from_modelhub(self): | def test_run_with_model_from_modelhub(self): | ||||
| model = Model.from_pretrained(self.model_id) | model = Model.from_pretrained(self.model_id) | ||||
| tokenizer = TokenClassificationTransformersPreprocessor( | tokenizer = TokenClassificationTransformersPreprocessor( | ||||
| @@ -46,6 +87,33 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| print(pipeline_ins(input=self.sentence)) | print(pipeline_ins(input=self.sentence)) | ||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||
| def test_run_ecom_with_model_from_modelhub(self): | |||||
| model = Model.from_pretrained(self.ecom_model_id) | |||||
| tokenizer = TokenClassificationTransformersPreprocessor( | |||||
| model.model_dir) | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.word_segmentation, model=model, preprocessor=tokenizer) | |||||
| print(pipeline_ins(input=self.sentence_ecom)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_lstmcrf_news_with_model_from_modelhub(self): | |||||
| model = Model.from_pretrained(self.lstmcrf_news_model_id) | |||||
| tokenizer = TokenClassificationTransformersPreprocessor( | |||||
| model.model_dir) | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.word_segmentation, model=model, preprocessor=tokenizer) | |||||
| print(pipeline_ins(input=self.sentence)) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_run_lstmcrf_ecom_with_model_from_modelhub(self): | |||||
| model = Model.from_pretrained(self.lstmcrf_ecom_model_id) | |||||
| tokenizer = TokenClassificationTransformersPreprocessor( | |||||
| model.model_dir) | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.word_segmentation, model=model, preprocessor=tokenizer) | |||||
| print(pipeline_ins(input=self.sentence_ecom)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_with_model_name(self): | def test_run_with_model_name(self): | ||||
| pipeline_ins = pipeline( | pipeline_ins = pipeline( | ||||
| task=Tasks.word_segmentation, model=self.model_id) | task=Tasks.word_segmentation, model=self.model_id) | ||||
| @@ -56,6 +124,24 @@ class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| print(pipeline_ins(input=self.sentence)) | print(pipeline_ins(input=self.sentence)) | ||||
| print(pipeline_ins(input=self.sentence_eng)) | print(pipeline_ins(input=self.sentence_eng)) | ||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_ecom_with_model_name(self): | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.word_segmentation, model=self.ecom_model_id) | |||||
| print(pipeline_ins(input=self.sentence_ecom)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_lstmcrf_news_with_model_name(self): | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.word_segmentation, model=self.lstmcrf_news_model_id) | |||||
| print(pipeline_ins(input=self.sentence)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_lstmcrf_ecom_with_model_name(self): | |||||
| pipeline_ins = pipeline( | |||||
| task=Tasks.word_segmentation, model=self.lstmcrf_ecom_model_id) | |||||
| print(pipeline_ins(input=self.sentence_ecom)) | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| def test_run_with_model_name_batch(self): | def test_run_with_model_name_batch(self): | ||||
| pipeline_ins = pipeline( | pipeline_ins = pipeline( | ||||