Browse Source

[to #42322933] bugs:circular dependency fixed/ word_segmentation output

master
zhangzhicheng.zzc 3 years ago
parent
commit
937d3ca67b
9 changed files with 79 additions and 31 deletions
  1. +2
    -2
      modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
  2. +1
    -1
      modelscope/pipelines/nlp/word_segmentation_pipeline.py
  3. +1
    -1
      modelscope/preprocessors/nlp.py
  4. +2
    -1
      modelscope/preprocessors/star/__init__.py
  5. +30
    -6
      modelscope/preprocessors/star/fields/__init__.py
  6. +22
    -0
      modelscope/utils/nlp/__init__.py
  7. +0
    -19
      modelscope/utils/nlp/nlp_utils.py
  8. +20
    -0
      modelscope/utils/nlp/utils.py
  9. +1
    -1
      modelscope/utils/test_utils.py

+ 2
- 2
modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py View File

@@ -11,8 +11,8 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import ConversationalTextToSqlPreprocessor
from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor
from modelscope.preprocessors.star.fields.process_dataset import process_tables
from modelscope.preprocessors.star.fields import (SubPreprocessor,
process_tables)
from modelscope.utils.constant import Tasks

__all__ = ['ConversationalTextToSqlPipeline']


+ 1
- 1
modelscope/pipelines/nlp/word_segmentation_pipeline.py View File

@@ -94,4 +94,4 @@ class WordSegmentationPipeline(Pipeline):
if chunk:
chunks.append(chunk)
seg_result = ' '.join(chunks)
return {OutputKeys.OUTPUT: seg_result}
return {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []}

+ 1
- 1
modelscope/preprocessors/nlp.py View File

@@ -16,7 +16,7 @@ from modelscope.utils.config import Config, ConfigFields
from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile
from modelscope.utils.hub import get_model_type, parse_label_mapping
from modelscope.utils.logger import get_logger
from modelscope.utils.nlp.nlp_utils import import_external_nltk_data
from modelscope.utils.nlp import import_external_nltk_data
from modelscope.utils.type_assert import type_assert
from .base import Preprocessor
from .builder import PREPROCESSORS


+ 2
- 1
modelscope/preprocessors/star/__init__.py View File

@@ -6,7 +6,8 @@ from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .conversational_text_to_sql_preprocessor import \
ConversationalTextToSqlPreprocessor
from .fields import MultiWOZBPETextField, IntentBPETextField
from .fields import (get_label, SubPreprocessor, preprocess_dataset,
process_dataset)

else:
_import_structure = {


+ 30
- 6
modelscope/preprocessors/star/fields/__init__.py View File

@@ -1,6 +1,30 @@
from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor
from modelscope.preprocessors.star.fields.parse import get_label
from modelscope.preprocessors.star.fields.preprocess_dataset import \
preprocess_dataset
from modelscope.preprocessors.star.fields.process_dataset import \
process_dataset
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .common_utils import SubPreprocessor
from .parse import get_label
from .preprocess_dataset import \
preprocess_dataset
from .process_dataset import \
process_dataset, process_tables

else:
_import_structure = {
'common_utils': ['SubPreprocessor'],
'parse': ['get_label'],
'preprocess_dataset': ['preprocess_dataset'],
'process_dataset': ['process_dataset', 'process_tables'],
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 22
- 0
modelscope/utils/nlp/__init__.py View File

@@ -0,0 +1,22 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .utils import import_external_nltk_data

else:
_import_structure = {
'utils': ['import_external_nltk_data'],
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 0
- 19
modelscope/utils/nlp/nlp_utils.py View File

@@ -42,22 +42,3 @@ def tracking_and_print_dialog_states(
print(json.dumps(result))

history_states.extend([result[OutputKeys.OUTPUT], {}])


def import_external_nltk_data(nltk_data_dir, package_name):
"""import external nltk_data, and extract nltk zip package.

Args:
nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data
package_name (str): nltk package name, eg. tokenizers/punkt
"""
import nltk
nltk.data.path.append(nltk_data_dir)

filepath = osp.join(nltk_data_dir, package_name + '.zip')
zippath = osp.join(nltk_data_dir, package_name)
packagepath = osp.dirname(zippath)
if not osp.exists(zippath):
import zipfile
with zipfile.ZipFile(filepath) as zf:
zf.extractall(osp.join(packagepath))

+ 20
- 0
modelscope/utils/nlp/utils.py View File

@@ -0,0 +1,20 @@
import os.path as osp


def import_external_nltk_data(nltk_data_dir, package_name):
"""import external nltk_data, and extract nltk zip package.

Args:
nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data
package_name (str): nltk package name, eg. tokenizers/punkt
"""
import nltk
nltk.data.path.append(nltk_data_dir)

filepath = osp.join(nltk_data_dir, package_name + '.zip')
zippath = osp.join(nltk_data_dir, package_name)
packagepath = osp.dirname(zippath)
if not osp.exists(zippath):
import zipfile
with zipfile.ZipFile(filepath) as zf:
zf.extractall(osp.join(packagepath))

+ 1
- 1
modelscope/utils/test_utils.py View File

@@ -11,7 +11,7 @@ import sys
import tarfile
import tempfile
import unittest
from typing import OrderedDict
from collections import OrderedDict

import requests
import torch


Loading…
Cancel
Save