| @@ -11,8 +11,8 @@ from modelscope.outputs import OutputKeys | |||||
| from modelscope.pipelines.base import Pipeline | from modelscope.pipelines.base import Pipeline | ||||
| from modelscope.pipelines.builder import PIPELINES | from modelscope.pipelines.builder import PIPELINES | ||||
| from modelscope.preprocessors import ConversationalTextToSqlPreprocessor | from modelscope.preprocessors import ConversationalTextToSqlPreprocessor | ||||
| from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor | |||||
| from modelscope.preprocessors.star.fields.process_dataset import process_tables | |||||
| from modelscope.preprocessors.star.fields import (SubPreprocessor, | |||||
| process_tables) | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| __all__ = ['ConversationalTextToSqlPipeline'] | __all__ = ['ConversationalTextToSqlPipeline'] | ||||
| @@ -94,4 +94,4 @@ class WordSegmentationPipeline(Pipeline): | |||||
| if chunk: | if chunk: | ||||
| chunks.append(chunk) | chunks.append(chunk) | ||||
| seg_result = ' '.join(chunks) | seg_result = ' '.join(chunks) | ||||
| return {OutputKeys.OUTPUT: seg_result} | |||||
| return {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []} | |||||
| @@ -16,7 +16,7 @@ from modelscope.utils.config import Config, ConfigFields | |||||
| from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile | from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile | ||||
| from modelscope.utils.hub import get_model_type, parse_label_mapping | from modelscope.utils.hub import get_model_type, parse_label_mapping | ||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from modelscope.utils.nlp.nlp_utils import import_external_nltk_data | |||||
| from modelscope.utils.nlp import import_external_nltk_data | |||||
| from modelscope.utils.type_assert import type_assert | from modelscope.utils.type_assert import type_assert | ||||
| from .base import Preprocessor | from .base import Preprocessor | ||||
| from .builder import PREPROCESSORS | from .builder import PREPROCESSORS | ||||
| @@ -6,7 +6,8 @@ from modelscope.utils.import_utils import LazyImportModule | |||||
| if TYPE_CHECKING: | if TYPE_CHECKING: | ||||
| from .conversational_text_to_sql_preprocessor import \ | from .conversational_text_to_sql_preprocessor import \ | ||||
| ConversationalTextToSqlPreprocessor | ConversationalTextToSqlPreprocessor | ||||
| from .fields import MultiWOZBPETextField, IntentBPETextField | |||||
| from .fields import (get_label, SubPreprocessor, preprocess_dataset, | |||||
| process_dataset) | |||||
| else: | else: | ||||
| _import_structure = { | _import_structure = { | ||||
| @@ -1,6 +1,30 @@ | |||||
| from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor | |||||
| from modelscope.preprocessors.star.fields.parse import get_label | |||||
| from modelscope.preprocessors.star.fields.preprocess_dataset import \ | |||||
| preprocess_dataset | |||||
| from modelscope.preprocessors.star.fields.process_dataset import \ | |||||
| process_dataset | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from typing import TYPE_CHECKING | |||||
| from modelscope.utils.import_utils import LazyImportModule | |||||
| if TYPE_CHECKING: | |||||
| from .common_utils import SubPreprocessor | |||||
| from .parse import get_label | |||||
| from .preprocess_dataset import \ | |||||
| preprocess_dataset | |||||
| from .process_dataset import \ | |||||
| process_dataset, process_tables | |||||
| else: | |||||
| _import_structure = { | |||||
| 'common_utils': ['SubPreprocessor'], | |||||
| 'parse': ['get_label'], | |||||
| 'preprocess_dataset': ['preprocess_dataset'], | |||||
| 'process_dataset': ['process_dataset', 'process_tables'], | |||||
| } | |||||
| import sys | |||||
| sys.modules[__name__] = LazyImportModule( | |||||
| __name__, | |||||
| globals()['__file__'], | |||||
| _import_structure, | |||||
| module_spec=__spec__, | |||||
| extra_objects={}, | |||||
| ) | |||||
| @@ -0,0 +1,22 @@ | |||||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||||
| from typing import TYPE_CHECKING | |||||
| from modelscope.utils.import_utils import LazyImportModule | |||||
| if TYPE_CHECKING: | |||||
| from .utils import import_external_nltk_data | |||||
| else: | |||||
| _import_structure = { | |||||
| 'utils': ['import_external_nltk_data'], | |||||
| } | |||||
| import sys | |||||
| sys.modules[__name__] = LazyImportModule( | |||||
| __name__, | |||||
| globals()['__file__'], | |||||
| _import_structure, | |||||
| module_spec=__spec__, | |||||
| extra_objects={}, | |||||
| ) | |||||
| @@ -42,22 +42,3 @@ def tracking_and_print_dialog_states( | |||||
| print(json.dumps(result)) | print(json.dumps(result)) | ||||
| history_states.extend([result[OutputKeys.OUTPUT], {}]) | history_states.extend([result[OutputKeys.OUTPUT], {}]) | ||||
| def import_external_nltk_data(nltk_data_dir, package_name): | |||||
| """import external nltk_data, and extract nltk zip package. | |||||
| Args: | |||||
| nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data | |||||
| package_name (str): nltk package name, eg. tokenizers/punkt | |||||
| """ | |||||
| import nltk | |||||
| nltk.data.path.append(nltk_data_dir) | |||||
| filepath = osp.join(nltk_data_dir, package_name + '.zip') | |||||
| zippath = osp.join(nltk_data_dir, package_name) | |||||
| packagepath = osp.dirname(zippath) | |||||
| if not osp.exists(zippath): | |||||
| import zipfile | |||||
| with zipfile.ZipFile(filepath) as zf: | |||||
| zf.extractall(osp.join(packagepath)) | |||||
| @@ -0,0 +1,20 @@ | |||||
| import os.path as osp | |||||
| def import_external_nltk_data(nltk_data_dir, package_name): | |||||
| """import external nltk_data, and extract nltk zip package. | |||||
| Args: | |||||
| nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data | |||||
| package_name (str): nltk package name, eg. tokenizers/punkt | |||||
| """ | |||||
| import nltk | |||||
| nltk.data.path.append(nltk_data_dir) | |||||
| filepath = osp.join(nltk_data_dir, package_name + '.zip') | |||||
| zippath = osp.join(nltk_data_dir, package_name) | |||||
| packagepath = osp.dirname(zippath) | |||||
| if not osp.exists(zippath): | |||||
| import zipfile | |||||
| with zipfile.ZipFile(filepath) as zf: | |||||
| zf.extractall(osp.join(packagepath)) | |||||
| @@ -11,7 +11,7 @@ import sys | |||||
| import tarfile | import tarfile | ||||
| import tempfile | import tempfile | ||||
| import unittest | import unittest | ||||
| from typing import OrderedDict | |||||
| from collections import OrderedDict | |||||
| import requests | import requests | ||||
| import torch | import torch | ||||