[to #42322933] bugs:circular dependency fixed/ word_segmentation output

3 years ago · 937d3ca67b
--- a/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
+++ b/modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
@@ -11,8 +11,8 @@ from modelscope.outputs import OutputKeys
 from modelscope.pipelines.base import Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.preprocessors import ConversationalTextToSqlPreprocessor
 from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor
 from modelscope.preprocessors.star.fields.process_dataset import process_tables
 from modelscope.preprocessors.star.fields import (SubPreprocessor,
                                                  process_tables)
 from modelscope.utils.constant import Tasks

 __all__ = ['ConversationalTextToSqlPipeline']
--- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py
+++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py
@@ -94,4 +94,4 @@ class WordSegmentationPipeline(Pipeline):
        if chunk:
            chunks.append(chunk)
        seg_result = ' '.join(chunks)
        return {OutputKeys.OUTPUT: seg_result}
        return {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []}
--- a/modelscope/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -16,7 +16,7 @@ from modelscope.utils.config import Config, ConfigFields
 from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile
 from modelscope.utils.hub import get_model_type, parse_label_mapping
 from modelscope.utils.logger import get_logger
 from modelscope.utils.nlp.nlp_utils import import_external_nltk_data
 from modelscope.utils.nlp import import_external_nltk_data
 from modelscope.utils.type_assert import type_assert
 from .base import Preprocessor
 from .builder import PREPROCESSORS
--- a/modelscope/preprocessors/star/init.py
+++ b/modelscope/preprocessors/star/init.py
@@ -6,7 +6,8 @@ from modelscope.utils.import_utils import LazyImportModule
 if TYPE_CHECKING:
    from .conversational_text_to_sql_preprocessor import \
        ConversationalTextToSqlPreprocessor
    from .fields import MultiWOZBPETextField, IntentBPETextField
    from .fields import (get_label, SubPreprocessor, preprocess_dataset,
                         process_dataset)

 else:
    _import_structure = {
--- a/modelscope/preprocessors/star/fields/init.py
+++ b/modelscope/preprocessors/star/fields/init.py
@@ -1,6 +1,30 @@
 from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor
 from modelscope.preprocessors.star.fields.parse import get_label
 from modelscope.preprocessors.star.fields.preprocess_dataset import \
    preprocess_dataset
 from modelscope.preprocessors.star.fields.process_dataset import \
    process_dataset
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING

 from modelscope.utils.import_utils import LazyImportModule

 if TYPE_CHECKING:
    from .common_utils import SubPreprocessor
    from .parse import get_label
    from .preprocess_dataset import \
        preprocess_dataset
    from .process_dataset import \
        process_dataset, process_tables

 else:
    _import_structure = {
        'common_utils': ['SubPreprocessor'],
        'parse': ['get_label'],
        'preprocess_dataset': ['preprocess_dataset'],
        'process_dataset': ['process_dataset', 'process_tables'],
    }

    import sys

    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/utils/nlp/init.py
+++ b/modelscope/utils/nlp/init.py
@@ -0,0 +1,22 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING

 from modelscope.utils.import_utils import LazyImportModule

 if TYPE_CHECKING:
    from .utils import import_external_nltk_data

 else:
    _import_structure = {
        'utils': ['import_external_nltk_data'],
    }

    import sys

    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/utils/nlp/nlp_utils.py
+++ b/modelscope/utils/nlp/nlp_utils.py
@@ -42,22 +42,3 @@ def tracking_and_print_dialog_states(
        print(json.dumps(result))

        history_states.extend([result[OutputKeys.OUTPUT], {}])


 def import_external_nltk_data(nltk_data_dir, package_name):
    """import external nltk_data, and extract nltk zip package.

    Args:
        nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data
        package_name (str): nltk package name, eg. tokenizers/punkt
    """
    import nltk
    nltk.data.path.append(nltk_data_dir)

    filepath = osp.join(nltk_data_dir, package_name + '.zip')
    zippath = osp.join(nltk_data_dir, package_name)
    packagepath = osp.dirname(zippath)
    if not osp.exists(zippath):
        import zipfile
        with zipfile.ZipFile(filepath) as zf:
            zf.extractall(osp.join(packagepath))
--- a/modelscope/utils/nlp/utils.py
+++ b/modelscope/utils/nlp/utils.py
@@ -0,0 +1,20 @@
 import os.path as osp


 def import_external_nltk_data(nltk_data_dir, package_name):
    """import external nltk_data, and extract nltk zip package.

    Args:
        nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data
        package_name (str): nltk package name, eg. tokenizers/punkt
    """
    import nltk
    nltk.data.path.append(nltk_data_dir)

    filepath = osp.join(nltk_data_dir, package_name + '.zip')
    zippath = osp.join(nltk_data_dir, package_name)
    packagepath = osp.dirname(zippath)
    if not osp.exists(zippath):
        import zipfile
        with zipfile.ZipFile(filepath) as zf:
            zf.extractall(osp.join(packagepath))
--- a/modelscope/utils/test_utils.py
+++ b/modelscope/utils/test_utils.py
@@ -11,7 +11,7 @@ import sys
 import tarfile
 import tempfile
 import unittest
 from typing import OrderedDict
 from collections import OrderedDict

 import requests
 import torch