Browse Source

[to #42322933] bugs:circular dependency fixed/ word_segmentation output

master
zhangzhicheng.zzc 3 years ago
parent
commit
937d3ca67b
9 changed files with 79 additions and 31 deletions
  1. +2
    -2
      modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
  2. +1
    -1
      modelscope/pipelines/nlp/word_segmentation_pipeline.py
  3. +1
    -1
      modelscope/preprocessors/nlp.py
  4. +2
    -1
      modelscope/preprocessors/star/__init__.py
  5. +30
    -6
      modelscope/preprocessors/star/fields/__init__.py
  6. +22
    -0
      modelscope/utils/nlp/__init__.py
  7. +0
    -19
      modelscope/utils/nlp/nlp_utils.py
  8. +20
    -0
      modelscope/utils/nlp/utils.py
  9. +1
    -1
      modelscope/utils/test_utils.py

+ 2
- 2
modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py View File

@@ -11,8 +11,8 @@ from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Pipeline from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.builder import PIPELINES from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import ConversationalTextToSqlPreprocessor from modelscope.preprocessors import ConversationalTextToSqlPreprocessor
from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor
from modelscope.preprocessors.star.fields.process_dataset import process_tables
from modelscope.preprocessors.star.fields import (SubPreprocessor,
process_tables)
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks


__all__ = ['ConversationalTextToSqlPipeline'] __all__ = ['ConversationalTextToSqlPipeline']


+ 1
- 1
modelscope/pipelines/nlp/word_segmentation_pipeline.py View File

@@ -94,4 +94,4 @@ class WordSegmentationPipeline(Pipeline):
if chunk: if chunk:
chunks.append(chunk) chunks.append(chunk)
seg_result = ' '.join(chunks) seg_result = ' '.join(chunks)
return {OutputKeys.OUTPUT: seg_result}
return {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []}

+ 1
- 1
modelscope/preprocessors/nlp.py View File

@@ -16,7 +16,7 @@ from modelscope.utils.config import Config, ConfigFields
from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile
from modelscope.utils.hub import get_model_type, parse_label_mapping from modelscope.utils.hub import get_model_type, parse_label_mapping
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from modelscope.utils.nlp.nlp_utils import import_external_nltk_data
from modelscope.utils.nlp import import_external_nltk_data
from modelscope.utils.type_assert import type_assert from modelscope.utils.type_assert import type_assert
from .base import Preprocessor from .base import Preprocessor
from .builder import PREPROCESSORS from .builder import PREPROCESSORS


+ 2
- 1
modelscope/preprocessors/star/__init__.py View File

@@ -6,7 +6,8 @@ from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING: if TYPE_CHECKING:
from .conversational_text_to_sql_preprocessor import \ from .conversational_text_to_sql_preprocessor import \
ConversationalTextToSqlPreprocessor ConversationalTextToSqlPreprocessor
from .fields import MultiWOZBPETextField, IntentBPETextField
from .fields import (get_label, SubPreprocessor, preprocess_dataset,
process_dataset)


else: else:
_import_structure = { _import_structure = {


+ 30
- 6
modelscope/preprocessors/star/fields/__init__.py View File

@@ -1,6 +1,30 @@
from modelscope.preprocessors.star.fields.common_utils import SubPreprocessor
from modelscope.preprocessors.star.fields.parse import get_label
from modelscope.preprocessors.star.fields.preprocess_dataset import \
preprocess_dataset
from modelscope.preprocessors.star.fields.process_dataset import \
process_dataset
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .common_utils import SubPreprocessor
from .parse import get_label
from .preprocess_dataset import \
preprocess_dataset
from .process_dataset import \
process_dataset, process_tables

else:
_import_structure = {
'common_utils': ['SubPreprocessor'],
'parse': ['get_label'],
'preprocess_dataset': ['preprocess_dataset'],
'process_dataset': ['process_dataset', 'process_tables'],
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 22
- 0
modelscope/utils/nlp/__init__.py View File

@@ -0,0 +1,22 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .utils import import_external_nltk_data

else:
_import_structure = {
'utils': ['import_external_nltk_data'],
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 0
- 19
modelscope/utils/nlp/nlp_utils.py View File

@@ -42,22 +42,3 @@ def tracking_and_print_dialog_states(
print(json.dumps(result)) print(json.dumps(result))


history_states.extend([result[OutputKeys.OUTPUT], {}]) history_states.extend([result[OutputKeys.OUTPUT], {}])


def import_external_nltk_data(nltk_data_dir, package_name):
"""import external nltk_data, and extract nltk zip package.

Args:
nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data
package_name (str): nltk package name, eg. tokenizers/punkt
"""
import nltk
nltk.data.path.append(nltk_data_dir)

filepath = osp.join(nltk_data_dir, package_name + '.zip')
zippath = osp.join(nltk_data_dir, package_name)
packagepath = osp.dirname(zippath)
if not osp.exists(zippath):
import zipfile
with zipfile.ZipFile(filepath) as zf:
zf.extractall(osp.join(packagepath))

+ 20
- 0
modelscope/utils/nlp/utils.py View File

@@ -0,0 +1,20 @@
import os.path as osp


def import_external_nltk_data(nltk_data_dir, package_name):
"""import external nltk_data, and extract nltk zip package.

Args:
nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data
package_name (str): nltk package name, eg. tokenizers/punkt
"""
import nltk
nltk.data.path.append(nltk_data_dir)

filepath = osp.join(nltk_data_dir, package_name + '.zip')
zippath = osp.join(nltk_data_dir, package_name)
packagepath = osp.dirname(zippath)
if not osp.exists(zippath):
import zipfile
with zipfile.ZipFile(filepath) as zf:
zf.extractall(osp.join(packagepath))

+ 1
- 1
modelscope/utils/test_utils.py View File

@@ -11,7 +11,7 @@ import sys
import tarfile import tarfile
import tempfile import tempfile
import unittest import unittest
from typing import OrderedDict
from collections import OrderedDict


import requests import requests
import torch import torch


Loading…
Cancel
Save