Browse Source

[to #42322933] add onnx model and onnx constant

master
Yingda Chen 3 years ago
parent
commit
f53b242332
26 changed files with 43 additions and 35 deletions
  1. +2
    -2
      modelscope/models/multi_modal/clip/clip_model.py
  2. +1
    -1
      modelscope/models/multi_modal/diffusion/model.py
  3. +1
    -2
      modelscope/models/multi_modal/mplug/__init__.py
  4. +4
    -4
      modelscope/models/multi_modal/mplug/modeling_mplug.py
  5. +3
    -1
      modelscope/models/multi_modal/ofa/tokenization_ofa.py
  6. +2
    -1
      modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py
  7. +2
    -1
      modelscope/models/nlp/structbert/tokenization_sbert.py
  8. +2
    -1
      modelscope/models/nlp/structbert/tokenization_sbert_fast.py
  9. +3
    -3
      modelscope/preprocessors/multi_modal.py
  10. +4
    -4
      modelscope/preprocessors/nlp.py
  11. +1
    -1
      modelscope/preprocessors/ofa/base.py
  12. +1
    -1
      modelscope/preprocessors/ofa/image_captioning.py
  13. +1
    -1
      modelscope/preprocessors/ofa/image_classification.py
  14. +1
    -1
      modelscope/preprocessors/ofa/summarization.py
  15. +1
    -1
      modelscope/preprocessors/ofa/text_classification.py
  16. +1
    -1
      modelscope/preprocessors/ofa/text_to_image_synthesis.py
  17. +1
    -1
      modelscope/preprocessors/ofa/visual_entailment.py
  18. +1
    -1
      modelscope/preprocessors/ofa/visual_grounding.py
  19. +1
    -1
      modelscope/preprocessors/ofa/visual_question_answering.py
  20. +1
    -1
      modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
  21. +1
    -1
      modelscope/preprocessors/space/dialog_modeling_preprocessor.py
  22. +1
    -1
      modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py
  23. +2
    -1
      modelscope/preprocessors/space/fields/gen_field.py
  24. +2
    -1
      modelscope/preprocessors/space/fields/intent_field.py
  25. +1
    -1
      modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py
  26. +2
    -0
      modelscope/utils/constant.py

+ 2
- 2
modelscope/models/multi_modal/clip/clip_model.py View File

@@ -17,7 +17,7 @@ from modelscope.models import TorchModel
from modelscope.models.builder import MODELS
from modelscope.models.multi_modal.clip.clip_bert import TextTransformer
from modelscope.models.multi_modal.clip.clip_vit import VisionTransformer
from modelscope.utils.constant import ModeKeys, Tasks
from modelscope.utils.constant import ModeKeys, ModelFile, Tasks
from modelscope.utils.logger import get_logger

logger = get_logger()
@@ -143,7 +143,7 @@ class CLIPForMultiModalEmbedding(TorchModel):
])

# text tokenizer
vocab_path = '{}/vocab.txt'.format(model_dir)
vocab_path = f'{model_dir}/{ModelFile.VOCAB_FILE}'
self.text_tokenizer = BertWordPieceTokenizer(
vocab_path, lowercase=False)
self.text_tokenizer.enable_truncation(max_length=30)


+ 1
- 1
modelscope/models/multi_modal/diffusion/model.py View File

@@ -136,7 +136,7 @@ class DiffusionForTextToImageSynthesis(Model):
self.unet_upsampler_1024 = diffusion_model.unet_upsampler_1024

# text tokenizer
vocab_path = '{}/vocab.txt'.format(model_dir)
vocab_path = f'{model_dir}/{ModelFile.VOCAB_FILE}'
self.tokenizer = Tokenizer(vocab_file=vocab_path, seq_len=64)

# diffusion process


+ 1
- 2
modelscope/models/multi_modal/mplug/__init__.py View File

@@ -14,5 +14,4 @@
# limitations under the License.

from .configuration_mplug import MPlugConfig
from .modeling_mplug import (CONFIG_NAME, VOCAB_NAME,
MPlugForVisualQuestionAnswering)
from .modeling_mplug import CONFIG_NAME, MPlugForVisualQuestionAnswering

+ 4
- 4
modelscope/models/multi_modal/mplug/modeling_mplug.py View File

@@ -42,14 +42,13 @@ from transformers.utils import logging

from modelscope.models.multi_modal.mplug.configuration_mplug import MPlugConfig
from modelscope.models.multi_modal.mplug.predictor import TextGenerator
from modelscope.utils.constant import ModelFile

transformers.logging.set_verbosity_error()

logger = logging.get_logger(__name__)

CONFIG_NAME = 'config.yaml'
WEIGHTS_NAME = 'pytorch_model.bin'
VOCAB_NAME = 'vocab.txt'

_CONFIG_FOR_DOC = 'BertConfig'
_TOKENIZER_FOR_DOC = 'BertTokenizer'
@@ -1733,7 +1732,7 @@ class MPlugForVisualQuestionAnswering(PreTrainedModel):
super().__init__(config)
self.config = config
self.tokenizer = BertTokenizer.from_pretrained(
os.path.join(config.model_dir, VOCAB_NAME))
os.path.join(config.model_dir, ModelFile.VOCAB_FILE))
self.module_setting(config)
self.visual_encoder = self._initialize_clip(config)
self.text_encoder = BertModel(
@@ -1751,7 +1750,8 @@ class MPlugForVisualQuestionAnswering(PreTrainedModel):
config.model_dir = model_dir
model = cls(config)
if load_checkpoint:
checkpoint_path = os.path.join(model_dir, WEIGHTS_NAME)
checkpoint_path = os.path.join(model_dir,
ModelFile.TORCH_MODEL_BIN_FILE)
checkpoint = torch.load(checkpoint_path, map_location='cpu')
if 'model' in checkpoint:
state_dict = checkpoint['model']


+ 3
- 1
modelscope/models/multi_modal/ofa/tokenization_ofa.py View File

@@ -22,6 +22,8 @@ from transformers.models.bert.tokenization_bert import (BasicTokenizer,
WordpieceTokenizer)
from transformers.utils import logging

from modelscope.utils.constant import ModelFile

logger = logging.get_logger(__name__)

VOCAB_FILES_NAMES = {'vocab_file': 'vocab.json', 'merges_file': 'merges.txt'}
@@ -42,7 +44,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
'ofa-base': 1024,
}

VOCAB_FILES_NAMES_ZH = {'vocab_file': 'vocab.txt'}
VOCAB_FILES_NAMES_ZH = {'vocab_file': ModelFile.VOCAB_FILE}

PRETRAINED_VOCAB_FILES_MAP_ZH = {
'vocab_file': {


+ 2
- 1
modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py View File

@@ -20,6 +20,7 @@ from transformers import PreTrainedTokenizerFast
from transformers.models.bart.tokenization_bart_fast import BartTokenizerFast
from transformers.utils import logging

from modelscope.utils.constant import ModelFile
from .tokenization_ofa import OFATokenizer, OFATokenizerZH

logger = logging.get_logger(__name__)
@@ -50,7 +51,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
'ofa-base': 1024,
}

VOCAB_FILES_NAMES_ZH = {'vocab_file': 'vocab.txt'}
VOCAB_FILES_NAMES_ZH = {'vocab_file': ModelFile.VOCAB_FILE}

PRETRAINED_VOCAB_FILES_MAP_ZH = {
'vocab_file': {


+ 2
- 1
modelscope/models/nlp/structbert/tokenization_sbert.py View File

@@ -23,11 +23,12 @@ from typing import List, Optional, Tuple
from transformers.tokenization_utils import (PreTrainedTokenizer, _is_control,
_is_punctuation, _is_whitespace)

from modelscope.utils.constant import ModelFile
from modelscope.utils.logger import get_logger

logger = get_logger(__name__)

VOCAB_FILES_NAMES = {'vocab_file': 'vocab.txt'}
VOCAB_FILES_NAMES = {'vocab_file': ModelFile.VOCAB_FILE}

PRETRAINED_VOCAB_FILES_MAP = {'vocab_file': {}}



+ 2
- 1
modelscope/models/nlp/structbert/tokenization_sbert_fast.py View File

@@ -22,13 +22,14 @@ import transformers
from tokenizers import normalizers
from transformers.tokenization_utils_fast import PreTrainedTokenizerFast

from modelscope.utils.constant import ModelFile
from modelscope.utils.logger import get_logger
from .tokenization_sbert import SbertTokenizer

logger = get_logger(__name__)

VOCAB_FILES_NAMES = {
'vocab_file': 'vocab.txt',
'vocab_file': ModelFile.VOCAB_FILE,
'tokenizer_file': 'tokenizer.json'
}



+ 3
- 3
modelscope/preprocessors/multi_modal.py View File

@@ -26,7 +26,7 @@ __all__ = [
class OfaPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path
@@ -97,13 +97,13 @@ class MPlugVisualQuestionAnsweringPreprocessor(Preprocessor):

"""
from transformers import BertTokenizer
from modelscope.models.multi_modal.mplug import CONFIG_NAME, VOCAB_NAME, MPlugConfig
from modelscope.models.multi_modal.mplug import CONFIG_NAME, MPlugConfig

super().__init__(*args, **kwargs)

# tokenizer
self.tokenizer = BertTokenizer.from_pretrained(
osp.join(model_dir, VOCAB_NAME))
osp.join(model_dir, ModelFile.VOCAB_FILE))

# load configuration
config = MPlugConfig.from_yaml_file(osp.join(model_dir, CONFIG_NAME))


+ 4
- 4
modelscope/preprocessors/nlp.py View File

@@ -44,7 +44,7 @@ class Tokenize(Preprocessor):
class SequenceClassificationPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path
@@ -291,7 +291,7 @@ class ZeroShotClassificationPreprocessor(NLPTokenizerPreprocessorBase):
"""

def __init__(self, model_dir: str, mode=ModeKeys.INFERENCE, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path
@@ -522,7 +522,7 @@ class NERPreprocessor(Preprocessor):
"""

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path
@@ -614,7 +614,7 @@ class TextErrorCorrectionPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
from fairseq.data import Dictionary
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data via the vocab file from the `model_dir` path

Args:
model_dir (str): model path


+ 1
- 1
modelscope/preprocessors/ofa/base.py View File

@@ -14,7 +14,7 @@ from .utils.random_help import set_torch_seed
class OfaBasePreprocessor:

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config


+ 1
- 1
modelscope/preprocessors/ofa/image_captioning.py View File

@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config


+ 1
- 1
modelscope/preprocessors/ofa/image_classification.py View File

@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
class OfaImageClassificationPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config


+ 1
- 1
modelscope/preprocessors/ofa/summarization.py View File

@@ -7,7 +7,7 @@ from .base import OfaBasePreprocessor
class OfaSummarizationPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config


+ 1
- 1
modelscope/preprocessors/ofa/text_classification.py View File

@@ -7,7 +7,7 @@ from .base import OfaBasePreprocessor
class OfaTextClassificationPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config


+ 1
- 1
modelscope/preprocessors/ofa/text_to_image_synthesis.py View File

@@ -9,7 +9,7 @@ from .base import OfaBasePreprocessor
class OfaTextToImageSynthesisPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path


+ 1
- 1
modelscope/preprocessors/ofa/visual_entailment.py View File

@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
class OfaVisualEntailmentPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config


+ 1
- 1
modelscope/preprocessors/ofa/visual_grounding.py View File

@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
class OfaVisualGroundingPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config


+ 1
- 1
modelscope/preprocessors/ofa/visual_question_answering.py View File

@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
class OfaVisualQuestionAnsweringPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config


+ 1
- 1
modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py View File

@@ -22,7 +22,7 @@ __all__ = ['DialogIntentPredictionPreprocessor']
class DialogIntentPredictionPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path


+ 1
- 1
modelscope/preprocessors/space/dialog_modeling_preprocessor.py View File

@@ -20,7 +20,7 @@ __all__ = ['DialogModelingPreprocessor']
class DialogModelingPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path


+ 1
- 1
modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py View File

@@ -17,7 +17,7 @@ __all__ = ['DialogStateTrackingPreprocessor']
class DialogStateTrackingPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path


+ 2
- 1
modelscope/preprocessors/space/fields/gen_field.py View File

@@ -8,6 +8,7 @@ from itertools import chain
import numpy as np

from modelscope.preprocessors.space.tokenizer import Tokenizer
from modelscope.utils.constant import ModelFile
from modelscope.utils.logger import get_logger
from modelscope.utils.nlp.space import ontology, utils
from modelscope.utils.nlp.space.db_ops import MultiWozDB
@@ -343,7 +344,7 @@ class MultiWOZBPETextField(BPETextField):
]
special_tokens.extend(self.add_sepcial_tokens())
self.tokenizer = Tokenizer(
vocab_path=os.path.join(model_dir, 'vocab.txt'),
vocab_path=os.path.join(model_dir, ModelFile.VOCAB_FILE),
special_tokens=special_tokens,
tokenizer_type=config.BPETextField.tokenizer_type)
self.understand_ids = self.tokenizer.convert_tokens_to_ids(


+ 2
- 1
modelscope/preprocessors/space/fields/intent_field.py View File

@@ -14,6 +14,7 @@ import numpy as np
from tqdm import tqdm

from modelscope.preprocessors.space.tokenizer import Tokenizer
from modelscope.utils.constant import ModelFile
from modelscope.utils.nlp.space import ontology
from modelscope.utils.nlp.space.scores import hierarchical_set_score
from modelscope.utils.nlp.space.utils import list2np
@@ -50,7 +51,7 @@ class BPETextField(object):
]
special_tokens.extend(self.add_sepcial_tokens())
self.tokenizer = Tokenizer(
vocab_path=os.path.join(model_dir, 'vocab.txt'),
vocab_path=os.path.join(model_dir, ModelFile.VOCAB_FILE),
special_tokens=special_tokens,
tokenizer_type=config.BPETextField.tokenizer_type)
self.understand_ids = self.numericalize(self.understand_tokens)


+ 1
- 1
modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py View File

@@ -28,7 +28,7 @@ __all__ = ['ConversationalTextToSqlPreprocessor']
class ConversationalTextToSqlPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path
"""preprocess the data

Args:
model_dir (str): model path


+ 2
- 0
modelscope/utils/constant.py View File

@@ -203,6 +203,8 @@ class ModelFile(object):
TF_CKPT_PREFIX = 'ckpt-'
TORCH_MODEL_FILE = 'pytorch_model.pt'
TORCH_MODEL_BIN_FILE = 'pytorch_model.bin'
VOCAB_FILE = 'vocab.txt'
ONNX_MODEL_FILE = 'model.onnx'
LABEL_MAPPING = 'label_mapping.json'




Loading…
Cancel
Save