diff --git a/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py b/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py index 399dad5a..c46e8c81 100644 --- a/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py +++ b/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py @@ -11,8 +11,8 @@ from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Pipeline from modelscope.pipelines.builder import PIPELINES from modelscope.preprocessors import ConversationalTextToSqlPreprocessor -from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor -from modelscope.preprocessors.star.fields.process_dataset import process_tables +from modelscope.preprocessors.star.fields import (SubPreprocessor, + process_tables) from modelscope.utils.constant import Tasks __all__ = ['ConversationalTextToSqlPipeline'] diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py index 66a5c524..9899243e 100644 --- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py +++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py @@ -94,4 +94,4 @@ class WordSegmentationPipeline(Pipeline): if chunk: chunks.append(chunk) seg_result = ' '.join(chunks) - return {OutputKeys.OUTPUT: seg_result} + return {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []} diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 84e7ca4d..9137b105 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -16,7 +16,7 @@ from modelscope.utils.config import Config, ConfigFields from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile from modelscope.utils.hub import get_model_type, parse_label_mapping from modelscope.utils.logger import get_logger -from modelscope.utils.nlp.nlp_utils import import_external_nltk_data +from modelscope.utils.nlp import import_external_nltk_data from modelscope.utils.type_assert import type_assert from .base import Preprocessor from .builder import PREPROCESSORS diff --git a/modelscope/preprocessors/star/__init__.py b/modelscope/preprocessors/star/__init__.py index 5a4bcea9..cef8f074 100644 --- a/modelscope/preprocessors/star/__init__.py +++ b/modelscope/preprocessors/star/__init__.py @@ -6,7 +6,8 @@ from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: from .conversational_text_to_sql_preprocessor import \ ConversationalTextToSqlPreprocessor - from .fields import MultiWOZBPETextField, IntentBPETextField + from .fields import (get_label, SubPreprocessor, preprocess_dataset, + process_dataset) else: _import_structure = { diff --git a/modelscope/preprocessors/star/fields/__init__.py b/modelscope/preprocessors/star/fields/__init__.py index 1e95a998..7049c43b 100644 --- a/modelscope/preprocessors/star/fields/__init__.py +++ b/modelscope/preprocessors/star/fields/__init__.py @@ -1,6 +1,30 @@ -from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor -from modelscope.preprocessors.star.fields.parse import get_label -from modelscope.preprocessors.star.fields.preprocess_dataset import \ - preprocess_dataset -from modelscope.preprocessors.star.fields.process_dataset import \ - process_dataset +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .common_utils import SubPreprocessor + from .parse import get_label + from .preprocess_dataset import \ + preprocess_dataset + from .process_dataset import \ + process_dataset, process_tables + +else: + _import_structure = { + 'common_utils': ['SubPreprocessor'], + 'parse': ['get_label'], + 'preprocess_dataset': ['preprocess_dataset'], + 'process_dataset': ['process_dataset', 'process_tables'], + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/utils/nlp/__init__.py b/modelscope/utils/nlp/__init__.py index e69de29b..62c0b888 100644 --- a/modelscope/utils/nlp/__init__.py +++ b/modelscope/utils/nlp/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .utils import import_external_nltk_data + +else: + _import_structure = { + 'utils': ['import_external_nltk_data'], + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/utils/nlp/nlp_utils.py b/modelscope/utils/nlp/nlp_utils.py index 64b12007..af539dda 100644 --- a/modelscope/utils/nlp/nlp_utils.py +++ b/modelscope/utils/nlp/nlp_utils.py @@ -42,22 +42,3 @@ def tracking_and_print_dialog_states( print(json.dumps(result)) history_states.extend([result[OutputKeys.OUTPUT], {}]) - - -def import_external_nltk_data(nltk_data_dir, package_name): - """import external nltk_data, and extract nltk zip package. - - Args: - nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data - package_name (str): nltk package name, eg. tokenizers/punkt - """ - import nltk - nltk.data.path.append(nltk_data_dir) - - filepath = osp.join(nltk_data_dir, package_name + '.zip') - zippath = osp.join(nltk_data_dir, package_name) - packagepath = osp.dirname(zippath) - if not osp.exists(zippath): - import zipfile - with zipfile.ZipFile(filepath) as zf: - zf.extractall(osp.join(packagepath)) diff --git a/modelscope/utils/nlp/utils.py b/modelscope/utils/nlp/utils.py new file mode 100644 index 00000000..13a21480 --- /dev/null +++ b/modelscope/utils/nlp/utils.py @@ -0,0 +1,20 @@ +import os.path as osp + + +def import_external_nltk_data(nltk_data_dir, package_name): + """import external nltk_data, and extract nltk zip package. + + Args: + nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data + package_name (str): nltk package name, eg. tokenizers/punkt + """ + import nltk + nltk.data.path.append(nltk_data_dir) + + filepath = osp.join(nltk_data_dir, package_name + '.zip') + zippath = osp.join(nltk_data_dir, package_name) + packagepath = osp.dirname(zippath) + if not osp.exists(zippath): + import zipfile + with zipfile.ZipFile(filepath) as zf: + zf.extractall(osp.join(packagepath)) diff --git a/modelscope/utils/test_utils.py b/modelscope/utils/test_utils.py index 8fb621d3..5109db11 100644 --- a/modelscope/utils/test_utils.py +++ b/modelscope/utils/test_utils.py @@ -11,7 +11,7 @@ import sys import tarfile import tempfile import unittest -from typing import OrderedDict +from collections import OrderedDict import requests import torch