diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 1d6fd874..913589d8 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -67,8 +67,9 @@ class Models(object): space_dst = 'space-dst' space_intent = 'space-intent' space_modeling = 'space-modeling' - star = 'star' - star3 = 'star3' + space_T_en = 'space-T-en' + space_T_cn = 'space-T-cn' + tcrf = 'transformer-crf' transformer_softmax = 'transformer-softmax' lcrf = 'lstm-crf' diff --git a/modelscope/models/nlp/star3/__init__.py b/modelscope/models/nlp/space_T_cn/__init__.py similarity index 100% rename from modelscope/models/nlp/star3/__init__.py rename to modelscope/models/nlp/space_T_cn/__init__.py diff --git a/modelscope/models/nlp/star3/configuration_star3.py b/modelscope/models/nlp/space_T_cn/configuration_space_T_cn.py similarity index 91% rename from modelscope/models/nlp/star3/configuration_star3.py rename to modelscope/models/nlp/space_T_cn/configuration_space_T_cn.py index 4c5ae677..553d8592 100644 --- a/modelscope/models/nlp/star3/configuration_star3.py +++ b/modelscope/models/nlp/space_T_cn/configuration_space_T_cn.py @@ -24,8 +24,8 @@ import json logger = logging.getLogger(__name__) -class Star3Config(object): - """Configuration class to store the configuration of a `Star3Model`. +class SpaceTCnConfig(object): + """Configuration class to store the configuration of a `SpaceTCnModel`. """ def __init__(self, @@ -40,10 +40,10 @@ class Star3Config(object): max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02): - """Constructs Star3Config. + """Constructs SpaceTCnConfig. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `Star3Model`. + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `SpaceTCnConfig`. hidden_size: Size of the encoder layers and the pooler layer. num_hidden_layers: Number of hidden layers in the Transformer encoder. num_attention_heads: Number of attention heads for each attention layer in @@ -59,7 +59,7 @@ class Star3Config(object): max_position_embeddings: The maximum sequence length that this model might ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048). - type_vocab_size: The vocabulary size of the `token_type_ids` passed into `Star3Model`. + type_vocab_size: The vocabulary size of the `token_type_ids` passed into `SpaceTCnConfig`. initializer_range: The sttdev of the truncated_normal_initializer for initializing all weight matrices. """ @@ -89,15 +89,15 @@ class Star3Config(object): @classmethod def from_dict(cls, json_object): - """Constructs a `Star3Config` from a Python dictionary of parameters.""" - config = Star3Config(vocab_size_or_config_json_file=-1) + """Constructs a `SpaceTCnConfig` from a Python dictionary of parameters.""" + config = SpaceTCnConfig(vocab_size_or_config_json_file=-1) for key, value in json_object.items(): config.__dict__[key] = value return config @classmethod def from_json_file(cls, json_file): - """Constructs a `Star3Config` from a json file of parameters.""" + """Constructs a `SpaceTCnConfig` from a json file of parameters.""" with open(json_file, 'r', encoding='utf-8') as reader: text = reader.read() return cls.from_dict(json.loads(text)) diff --git a/modelscope/models/nlp/star3/modeling_star3.py b/modelscope/models/nlp/space_T_cn/modeling_space_T_cn.py similarity index 98% rename from modelscope/models/nlp/star3/modeling_star3.py rename to modelscope/models/nlp/space_T_cn/modeling_space_T_cn.py index 13f7136a..72c94724 100644 --- a/modelscope/models/nlp/star3/modeling_star3.py +++ b/modelscope/models/nlp/space_T_cn/modeling_space_T_cn.py @@ -27,7 +27,8 @@ import numpy as np import torch from torch import nn -from modelscope.models.nlp.star3.configuration_star3 import Star3Config +from modelscope.models.nlp.space_T_cn.configuration_space_T_cn import \ + SpaceTCnConfig from modelscope.utils.constant import ModelFile from modelscope.utils.logger import get_logger @@ -609,9 +610,9 @@ class PreTrainedBertModel(nn.Module): def __init__(self, config, *inputs, **kwargs): super(PreTrainedBertModel, self).__init__() - if not isinstance(config, Star3Config): + if not isinstance(config, SpaceTCnConfig): raise ValueError( - 'Parameter config in `{}(config)` should be an instance of class `Star3Config`. ' + 'Parameter config in `{}(config)` should be an instance of class `SpaceTCnConfig`. ' 'To create a model from a Google pretrained model use ' '`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`'.format( self.__class__.__name__, self.__class__.__name__)) @@ -676,7 +677,7 @@ class PreTrainedBertModel(nn.Module): serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) - config = Star3Config.from_json_file(config_file) + config = SpaceTCnConfig.from_json_file(config_file) logger.info('Model config {}'.format(config)) # Instantiate model. model = cls(config, *inputs, **kwargs) @@ -742,11 +743,11 @@ class PreTrainedBertModel(nn.Module): return model -class Star3Model(PreTrainedBertModel): - """Star3Model model ("Bidirectional Embedding Representations from a Transformer pretrained on STAR3.0"). +class SpaceTCnModel(PreTrainedBertModel): + """SpaceTCnModel model ("Bidirectional Embedding Representations from a Transformer pretrained on STAR-T-CN"). Params: - config: a Star3Config class instance with the configuration to build a new model + config: a SpaceTCnConfig class instance with the configuration to build a new model Inputs: `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] @@ -780,16 +781,16 @@ class Star3Model(PreTrainedBertModel): input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - config = modeling.Star3Config(vocab_size_or_config_json_file=32000, hidden_size=768, + config = modeling.SpaceTCnConfig(vocab_size_or_config_json_file=32000, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) - model = modeling.Star3Model(config=config) + model = modeling.SpaceTCnModel(config=config) all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) ``` """ def __init__(self, config, schema_link_module='none'): - super(Star3Model, self).__init__(config) + super(SpaceTCnModel, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder( config, schema_link_module=schema_link_module) diff --git a/modelscope/models/nlp/star_text_to_sql.py b/modelscope/models/nlp/star_text_to_sql.py index eef76e8a..089f1c89 100644 --- a/modelscope/models/nlp/star_text_to_sql.py +++ b/modelscope/models/nlp/star_text_to_sql.py @@ -20,7 +20,7 @@ __all__ = ['StarForTextToSql'] @MODELS.register_module( - Tasks.conversational_text_to_sql, module_name=Models.star) + Tasks.table_question_answering, module_name=Models.space_T_en) class StarForTextToSql(Model): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/models/nlp/table_question_answering.py b/modelscope/models/nlp/table_question_answering.py index c2134df2..8e05dd0f 100644 --- a/modelscope/models/nlp/table_question_answering.py +++ b/modelscope/models/nlp/table_question_answering.py @@ -3,27 +3,25 @@ import os from typing import Dict -import json import numpy import torch import torch.nn.functional as F -import tqdm from transformers import BertTokenizer from modelscope.metainfo import Models from modelscope.models.base import Model, Tensor from modelscope.models.builder import MODELS -from modelscope.models.nlp.star3.configuration_star3 import Star3Config -from modelscope.models.nlp.star3.modeling_star3 import Seq2SQL, Star3Model -from modelscope.preprocessors.star3.fields.struct import Constant +from modelscope.preprocessors.space_T_cn.fields.struct import Constant from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.device import verify_device +from .space_T_cn.configuration_space_T_cn import SpaceTCnConfig +from .space_T_cn.modeling_space_T_cn import Seq2SQL, SpaceTCnModel __all__ = ['TableQuestionAnswering'] @MODELS.register_module( - Tasks.table_question_answering, module_name=Models.star3) + Tasks.table_question_answering, module_name=Models.space_T_cn) class TableQuestionAnswering(Model): def __init__(self, model_dir: str, *args, **kwargs): @@ -43,9 +41,9 @@ class TableQuestionAnswering(Model): os.path.join(self.model_dir, ModelFile.TORCH_MODEL_BIN_FILE), map_location='cpu') - self.backbone_config = Star3Config.from_json_file( + self.backbone_config = SpaceTCnConfig.from_json_file( os.path.join(self.model_dir, ModelFile.CONFIGURATION)) - self.backbone_model = Star3Model( + self.backbone_model = SpaceTCnModel( config=self.backbone_config, schema_link_module='rat') self.backbone_model.load_state_dict(state_dict['backbone_model']) diff --git a/modelscope/outputs.py b/modelscope/outputs.py index 13d440ca..34bde76a 100644 --- a/modelscope/outputs.py +++ b/modelscope/outputs.py @@ -606,21 +606,12 @@ TASK_OUTPUTS = { # } Tasks.task_oriented_conversation: [OutputKeys.OUTPUT], - # conversational text-to-sql result for single sample - # { - # "text": "SELECT shop.Name FROM shop." - # } - Tasks.conversational_text_to_sql: [OutputKeys.TEXT], - # table-question-answering result for single sample # { # "sql": "SELECT shop.Name FROM shop." # "sql_history": {sel: 0, agg: 0, conds: [[0, 0, 'val']]} # } - Tasks.table_question_answering: [ - OutputKeys.SQL_STRING, OutputKeys.SQL_QUERY, OutputKeys.HISTORY, - OutputKeys.QUERT_RESULT - ], + Tasks.table_question_answering: [OutputKeys.OUTPUT], # ============ audio tasks =================== # asr result for single sample diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 8c81118c..e1583387 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -69,9 +69,6 @@ DEFAULT_MODEL_FOR_PIPELINE = { 'damo/nlp_space_dialog-modeling'), Tasks.dialog_state_tracking: (Pipelines.dialog_state_tracking, 'damo/nlp_space_dialog-state-tracking'), - Tasks.conversational_text_to_sql: - (Pipelines.conversational_text_to_sql, - 'damo/nlp_star_conversational-text-to-sql'), Tasks.table_question_answering: (Pipelines.table_question_answering_pipeline, 'damo/nlp-convai-text2sql-pretrain-cn'), diff --git a/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py b/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py index c46e8c81..73c6429d 100644 --- a/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py +++ b/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py @@ -19,7 +19,7 @@ __all__ = ['ConversationalTextToSqlPipeline'] @PIPELINES.register_module( - Tasks.conversational_text_to_sql, + Tasks.table_question_answering, module_name=Pipelines.conversational_text_to_sql) class ConversationalTextToSqlPipeline(Pipeline): @@ -62,7 +62,7 @@ class ConversationalTextToSqlPipeline(Pipeline): Dict[str, str]: the prediction results """ sql = Example.evaluator.obtain_sql(inputs['predict'][0], inputs['db']) - result = {OutputKeys.TEXT: sql} + result = {OutputKeys.OUTPUT: {OutputKeys.TEXT: sql}} return result def _collate_fn(self, data): diff --git a/modelscope/pipelines/nlp/table_question_answering_pipeline.py b/modelscope/pipelines/nlp/table_question_answering_pipeline.py index 08501953..52ba33e0 100644 --- a/modelscope/pipelines/nlp/table_question_answering_pipeline.py +++ b/modelscope/pipelines/nlp/table_question_answering_pipeline.py @@ -13,8 +13,9 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Pipeline from modelscope.pipelines.builder import PIPELINES from modelscope.preprocessors import TableQuestionAnsweringPreprocessor -from modelscope.preprocessors.star3.fields.database import Database -from modelscope.preprocessors.star3.fields.struct import Constant, SQLQuery +from modelscope.preprocessors.space_T_cn.fields.database import Database +from modelscope.preprocessors.space_T_cn.fields.struct import (Constant, + SQLQuery) from modelscope.utils.constant import ModelFile, Tasks __all__ = ['TableQuestionAnsweringPipeline'] @@ -320,7 +321,7 @@ class TableQuestionAnsweringPipeline(Pipeline): OutputKeys.QUERT_RESULT: tabledata, } - return output + return {OutputKeys.OUTPUT: output} def _collate_fn(self, data): return data diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index 63302aa7..423b3f46 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -40,7 +40,7 @@ if TYPE_CHECKING: DialogStateTrackingPreprocessor) from .video import ReadVideoData, MovieSceneSegmentationPreprocessor from .star import ConversationalTextToSqlPreprocessor - from .star3 import TableQuestionAnsweringPreprocessor + from .space_T_cn import TableQuestionAnsweringPreprocessor else: _import_structure = { @@ -81,7 +81,7 @@ else: 'DialogStateTrackingPreprocessor', 'InputFeatures' ], 'star': ['ConversationalTextToSqlPreprocessor'], - 'star3': ['TableQuestionAnsweringPreprocessor'], + 'space_T_cn': ['TableQuestionAnsweringPreprocessor'], } import sys diff --git a/modelscope/preprocessors/star3/__init__.py b/modelscope/preprocessors/space_T_cn/__init__.py similarity index 100% rename from modelscope/preprocessors/star3/__init__.py rename to modelscope/preprocessors/space_T_cn/__init__.py diff --git a/modelscope/preprocessors/star3/fields/__init__.py b/modelscope/preprocessors/space_T_cn/fields/__init__.py similarity index 100% rename from modelscope/preprocessors/star3/fields/__init__.py rename to modelscope/preprocessors/space_T_cn/fields/__init__.py diff --git a/modelscope/preprocessors/star3/fields/database.py b/modelscope/preprocessors/space_T_cn/fields/database.py similarity index 98% rename from modelscope/preprocessors/star3/fields/database.py rename to modelscope/preprocessors/space_T_cn/fields/database.py index 5debfe2c..481bd1db 100644 --- a/modelscope/preprocessors/star3/fields/database.py +++ b/modelscope/preprocessors/space_T_cn/fields/database.py @@ -4,7 +4,7 @@ import sqlite3 import json import tqdm -from modelscope.preprocessors.star3.fields.struct import Trie +from modelscope.preprocessors.space_T_cn.fields.struct import Trie class Database: diff --git a/modelscope/preprocessors/star3/fields/schema_link.py b/modelscope/preprocessors/space_T_cn/fields/schema_link.py similarity index 99% rename from modelscope/preprocessors/star3/fields/schema_link.py rename to modelscope/preprocessors/space_T_cn/fields/schema_link.py index 220a71d8..4b8f9d31 100644 --- a/modelscope/preprocessors/star3/fields/schema_link.py +++ b/modelscope/preprocessors/space_T_cn/fields/schema_link.py @@ -1,7 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import re -from modelscope.preprocessors.star3.fields.struct import TypeInfo +from modelscope.preprocessors.space_T_cn.fields.struct import TypeInfo class SchemaLinker: diff --git a/modelscope/preprocessors/star3/fields/struct.py b/modelscope/preprocessors/space_T_cn/fields/struct.py similarity index 100% rename from modelscope/preprocessors/star3/fields/struct.py rename to modelscope/preprocessors/space_T_cn/fields/struct.py diff --git a/modelscope/preprocessors/star3/table_question_answering_preprocessor.py b/modelscope/preprocessors/space_T_cn/table_question_answering_preprocessor.py similarity index 96% rename from modelscope/preprocessors/star3/table_question_answering_preprocessor.py rename to modelscope/preprocessors/space_T_cn/table_question_answering_preprocessor.py index ed2911f6..63e6fd57 100644 --- a/modelscope/preprocessors/star3/table_question_answering_preprocessor.py +++ b/modelscope/preprocessors/space_T_cn/table_question_answering_preprocessor.py @@ -8,8 +8,8 @@ from transformers import BertTokenizer from modelscope.metainfo import Preprocessors from modelscope.preprocessors.base import Preprocessor from modelscope.preprocessors.builder import PREPROCESSORS -from modelscope.preprocessors.star3.fields.database import Database -from modelscope.preprocessors.star3.fields.schema_link import SchemaLinker +from modelscope.preprocessors.space_T_cn.fields.database import Database +from modelscope.preprocessors.space_T_cn.fields.schema_link import SchemaLinker from modelscope.utils.config import Config from modelscope.utils.constant import Fields, ModelFile from modelscope.utils.type_assert import type_assert diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 87a0a417..50a1c016 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -123,7 +123,6 @@ class NLPTasks(object): backbone = 'backbone' text_error_correction = 'text-error-correction' faq_question_answering = 'faq-question-answering' - conversational_text_to_sql = 'conversational-text-to-sql' information_extraction = 'information-extraction' document_segmentation = 'document-segmentation' feature_extraction = 'feature-extraction' diff --git a/modelscope/utils/nlp/nlp_utils.py b/modelscope/utils/nlp/nlp_utils.py index 35b374f2..bfeaf924 100644 --- a/modelscope/utils/nlp/nlp_utils.py +++ b/modelscope/utils/nlp/nlp_utils.py @@ -20,7 +20,7 @@ def text2sql_tracking_and_print_results( results = p(case) print({'question': item}) print(results) - last_sql = results['text'] + last_sql = results[OutputKeys.OUTPUT][OutputKeys.TEXT] history.append(item) diff --git a/tests/pipelines/test_conversational_text_to_sql.py b/tests/pipelines/test_conversational_text_to_sql.py index 80c72337..21a4e0ce 100644 --- a/tests/pipelines/test_conversational_text_to_sql.py +++ b/tests/pipelines/test_conversational_text_to_sql.py @@ -16,7 +16,7 @@ from modelscope.utils.test_utils import test_level class ConversationalTextToSql(unittest.TestCase, DemoCompatibilityCheck): def setUp(self) -> None: - self.task = Tasks.conversational_text_to_sql + self.task = Tasks.table_question_answering self.model_id = 'damo/nlp_star_conversational-text-to-sql' model_id = 'damo/nlp_star_conversational-text-to-sql' @@ -66,11 +66,6 @@ class ConversationalTextToSql(unittest.TestCase, DemoCompatibilityCheck): pipelines = [pipeline(task=self.task, model=self.model_id)] text2sql_tracking_and_print_results(self.test_case, pipelines) - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') - def test_run_with_default_model(self): - pipelines = [pipeline(task=self.task)] - text2sql_tracking_and_print_results(self.test_case, pipelines) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_demo_compatibility(self): self.compatibility_check() diff --git a/tests/pipelines/test_table_question_answering.py b/tests/pipelines/test_table_question_answering.py index 571ca795..828ef5ac 100644 --- a/tests/pipelines/test_table_question_answering.py +++ b/tests/pipelines/test_table_question_answering.py @@ -12,7 +12,7 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import TableQuestionAnsweringPipeline from modelscope.preprocessors import TableQuestionAnsweringPreprocessor -from modelscope.preprocessors.star3.fields.database import Database +from modelscope.preprocessors.space_T_cn.fields.database import Database from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.test_utils import test_level @@ -38,7 +38,7 @@ def tableqa_tracking_and_print_results_with_history( output_dict = p({ 'question': question, 'history_sql': historical_queries - }) + })[OutputKeys.OUTPUT] print('question', question) print('sql text:', output_dict[OutputKeys.SQL_STRING]) print('sql query:', output_dict[OutputKeys.SQL_QUERY]) @@ -61,7 +61,7 @@ def tableqa_tracking_and_print_results_without_history( } for p in pipelines: for question in test_case['utterance']: - output_dict = p({'question': question}) + output_dict = p({'question': question})[OutputKeys.OUTPUT] print('question', question) print('sql text:', output_dict[OutputKeys.SQL_STRING]) print('sql query:', output_dict[OutputKeys.SQL_QUERY]) @@ -92,7 +92,7 @@ def tableqa_tracking_and_print_results_with_tableid( 'question': question, 'table_id': table_id, 'history_sql': historical_queries - }) + })[OutputKeys.OUTPUT] print('question', question) print('sql text:', output_dict[OutputKeys.SQL_STRING]) print('sql query:', output_dict[OutputKeys.SQL_QUERY]) @@ -147,11 +147,6 @@ class TableQuestionAnswering(unittest.TestCase): ] tableqa_tracking_and_print_results_with_tableid(pipelines) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_run_with_model_from_task(self): - pipelines = [pipeline(Tasks.table_question_answering, self.model_id)] - tableqa_tracking_and_print_results_with_history(pipelines) - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_model_from_modelhub_with_other_classes(self): model = Model.from_pretrained(self.model_id)