From f53b24233211d2af44494546420ec25e48811c13 Mon Sep 17 00:00:00 2001 From: Yingda Chen Date: Tue, 16 Aug 2022 13:33:44 +0800 Subject: [PATCH] [to #42322933] add onnx model and onnx constant --- modelscope/models/multi_modal/clip/clip_model.py | 4 ++-- modelscope/models/multi_modal/diffusion/model.py | 2 +- modelscope/models/multi_modal/mplug/__init__.py | 3 +-- modelscope/models/multi_modal/mplug/modeling_mplug.py | 8 ++++---- modelscope/models/multi_modal/ofa/tokenization_ofa.py | 4 +++- .../models/multi_modal/ofa/tokenization_ofa_fast.py | 3 ++- modelscope/models/nlp/structbert/tokenization_sbert.py | 3 ++- .../models/nlp/structbert/tokenization_sbert_fast.py | 3 ++- modelscope/preprocessors/multi_modal.py | 6 +++--- modelscope/preprocessors/nlp.py | 8 ++++---- modelscope/preprocessors/ofa/base.py | 2 +- modelscope/preprocessors/ofa/image_captioning.py | 2 +- modelscope/preprocessors/ofa/image_classification.py | 2 +- modelscope/preprocessors/ofa/summarization.py | 2 +- modelscope/preprocessors/ofa/text_classification.py | 2 +- modelscope/preprocessors/ofa/text_to_image_synthesis.py | 2 +- modelscope/preprocessors/ofa/visual_entailment.py | 2 +- modelscope/preprocessors/ofa/visual_grounding.py | 2 +- modelscope/preprocessors/ofa/visual_question_answering.py | 2 +- .../space/dialog_intent_prediction_preprocessor.py | 2 +- .../preprocessors/space/dialog_modeling_preprocessor.py | 2 +- .../space/dialog_state_tracking_preprocessor.py | 2 +- modelscope/preprocessors/space/fields/gen_field.py | 3 ++- modelscope/preprocessors/space/fields/intent_field.py | 3 ++- .../star/conversational_text_to_sql_preprocessor.py | 2 +- modelscope/utils/constant.py | 2 ++ 26 files changed, 43 insertions(+), 35 deletions(-) diff --git a/modelscope/models/multi_modal/clip/clip_model.py b/modelscope/models/multi_modal/clip/clip_model.py index e092f4af..738057ce 100644 --- a/modelscope/models/multi_modal/clip/clip_model.py +++ b/modelscope/models/multi_modal/clip/clip_model.py @@ -17,7 +17,7 @@ from modelscope.models import TorchModel from modelscope.models.builder import MODELS from modelscope.models.multi_modal.clip.clip_bert import TextTransformer from modelscope.models.multi_modal.clip.clip_vit import VisionTransformer -from modelscope.utils.constant import ModeKeys, Tasks +from modelscope.utils.constant import ModeKeys, ModelFile, Tasks from modelscope.utils.logger import get_logger logger = get_logger() @@ -143,7 +143,7 @@ class CLIPForMultiModalEmbedding(TorchModel): ]) # text tokenizer - vocab_path = '{}/vocab.txt'.format(model_dir) + vocab_path = f'{model_dir}/{ModelFile.VOCAB_FILE}' self.text_tokenizer = BertWordPieceTokenizer( vocab_path, lowercase=False) self.text_tokenizer.enable_truncation(max_length=30) diff --git a/modelscope/models/multi_modal/diffusion/model.py b/modelscope/models/multi_modal/diffusion/model.py index 4d61e2d1..8617b8dd 100644 --- a/modelscope/models/multi_modal/diffusion/model.py +++ b/modelscope/models/multi_modal/diffusion/model.py @@ -136,7 +136,7 @@ class DiffusionForTextToImageSynthesis(Model): self.unet_upsampler_1024 = diffusion_model.unet_upsampler_1024 # text tokenizer - vocab_path = '{}/vocab.txt'.format(model_dir) + vocab_path = f'{model_dir}/{ModelFile.VOCAB_FILE}' self.tokenizer = Tokenizer(vocab_file=vocab_path, seq_len=64) # diffusion process diff --git a/modelscope/models/multi_modal/mplug/__init__.py b/modelscope/models/multi_modal/mplug/__init__.py index bca5849b..a145fc0c 100644 --- a/modelscope/models/multi_modal/mplug/__init__.py +++ b/modelscope/models/multi_modal/mplug/__init__.py @@ -14,5 +14,4 @@ # limitations under the License. from .configuration_mplug import MPlugConfig -from .modeling_mplug import (CONFIG_NAME, VOCAB_NAME, - MPlugForVisualQuestionAnswering) +from .modeling_mplug import CONFIG_NAME, MPlugForVisualQuestionAnswering diff --git a/modelscope/models/multi_modal/mplug/modeling_mplug.py b/modelscope/models/multi_modal/mplug/modeling_mplug.py index 0b45ea12..79fab718 100755 --- a/modelscope/models/multi_modal/mplug/modeling_mplug.py +++ b/modelscope/models/multi_modal/mplug/modeling_mplug.py @@ -42,14 +42,13 @@ from transformers.utils import logging from modelscope.models.multi_modal.mplug.configuration_mplug import MPlugConfig from modelscope.models.multi_modal.mplug.predictor import TextGenerator +from modelscope.utils.constant import ModelFile transformers.logging.set_verbosity_error() logger = logging.get_logger(__name__) CONFIG_NAME = 'config.yaml' -WEIGHTS_NAME = 'pytorch_model.bin' -VOCAB_NAME = 'vocab.txt' _CONFIG_FOR_DOC = 'BertConfig' _TOKENIZER_FOR_DOC = 'BertTokenizer' @@ -1733,7 +1732,7 @@ class MPlugForVisualQuestionAnswering(PreTrainedModel): super().__init__(config) self.config = config self.tokenizer = BertTokenizer.from_pretrained( - os.path.join(config.model_dir, VOCAB_NAME)) + os.path.join(config.model_dir, ModelFile.VOCAB_FILE)) self.module_setting(config) self.visual_encoder = self._initialize_clip(config) self.text_encoder = BertModel( @@ -1751,7 +1750,8 @@ class MPlugForVisualQuestionAnswering(PreTrainedModel): config.model_dir = model_dir model = cls(config) if load_checkpoint: - checkpoint_path = os.path.join(model_dir, WEIGHTS_NAME) + checkpoint_path = os.path.join(model_dir, + ModelFile.TORCH_MODEL_BIN_FILE) checkpoint = torch.load(checkpoint_path, map_location='cpu') if 'model' in checkpoint: state_dict = checkpoint['model'] diff --git a/modelscope/models/multi_modal/ofa/tokenization_ofa.py b/modelscope/models/multi_modal/ofa/tokenization_ofa.py index 158905eb..fd50505c 100644 --- a/modelscope/models/multi_modal/ofa/tokenization_ofa.py +++ b/modelscope/models/multi_modal/ofa/tokenization_ofa.py @@ -22,6 +22,8 @@ from transformers.models.bert.tokenization_bert import (BasicTokenizer, WordpieceTokenizer) from transformers.utils import logging +from modelscope.utils.constant import ModelFile + logger = logging.get_logger(__name__) VOCAB_FILES_NAMES = {'vocab_file': 'vocab.json', 'merges_file': 'merges.txt'} @@ -42,7 +44,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { 'ofa-base': 1024, } -VOCAB_FILES_NAMES_ZH = {'vocab_file': 'vocab.txt'} +VOCAB_FILES_NAMES_ZH = {'vocab_file': ModelFile.VOCAB_FILE} PRETRAINED_VOCAB_FILES_MAP_ZH = { 'vocab_file': { diff --git a/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py b/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py index 03d2d71e..db11370d 100644 --- a/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py +++ b/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py @@ -20,6 +20,7 @@ from transformers import PreTrainedTokenizerFast from transformers.models.bart.tokenization_bart_fast import BartTokenizerFast from transformers.utils import logging +from modelscope.utils.constant import ModelFile from .tokenization_ofa import OFATokenizer, OFATokenizerZH logger = logging.get_logger(__name__) @@ -50,7 +51,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { 'ofa-base': 1024, } -VOCAB_FILES_NAMES_ZH = {'vocab_file': 'vocab.txt'} +VOCAB_FILES_NAMES_ZH = {'vocab_file': ModelFile.VOCAB_FILE} PRETRAINED_VOCAB_FILES_MAP_ZH = { 'vocab_file': { diff --git a/modelscope/models/nlp/structbert/tokenization_sbert.py b/modelscope/models/nlp/structbert/tokenization_sbert.py index cbf98746..3171e31d 100644 --- a/modelscope/models/nlp/structbert/tokenization_sbert.py +++ b/modelscope/models/nlp/structbert/tokenization_sbert.py @@ -23,11 +23,12 @@ from typing import List, Optional, Tuple from transformers.tokenization_utils import (PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace) +from modelscope.utils.constant import ModelFile from modelscope.utils.logger import get_logger logger = get_logger(__name__) -VOCAB_FILES_NAMES = {'vocab_file': 'vocab.txt'} +VOCAB_FILES_NAMES = {'vocab_file': ModelFile.VOCAB_FILE} PRETRAINED_VOCAB_FILES_MAP = {'vocab_file': {}} diff --git a/modelscope/models/nlp/structbert/tokenization_sbert_fast.py b/modelscope/models/nlp/structbert/tokenization_sbert_fast.py index 5b8d79cc..a0a81121 100644 --- a/modelscope/models/nlp/structbert/tokenization_sbert_fast.py +++ b/modelscope/models/nlp/structbert/tokenization_sbert_fast.py @@ -22,13 +22,14 @@ import transformers from tokenizers import normalizers from transformers.tokenization_utils_fast import PreTrainedTokenizerFast +from modelscope.utils.constant import ModelFile from modelscope.utils.logger import get_logger from .tokenization_sbert import SbertTokenizer logger = get_logger(__name__) VOCAB_FILES_NAMES = { - 'vocab_file': 'vocab.txt', + 'vocab_file': ModelFile.VOCAB_FILE, 'tokenizer_file': 'tokenizer.json' } diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py index 65578e6a..7665e8b7 100644 --- a/modelscope/preprocessors/multi_modal.py +++ b/modelscope/preprocessors/multi_modal.py @@ -26,7 +26,7 @@ __all__ = [ class OfaPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path @@ -97,13 +97,13 @@ class MPlugVisualQuestionAnsweringPreprocessor(Preprocessor): """ from transformers import BertTokenizer - from modelscope.models.multi_modal.mplug import CONFIG_NAME, VOCAB_NAME, MPlugConfig + from modelscope.models.multi_modal.mplug import CONFIG_NAME, MPlugConfig super().__init__(*args, **kwargs) # tokenizer self.tokenizer = BertTokenizer.from_pretrained( - osp.join(model_dir, VOCAB_NAME)) + osp.join(model_dir, ModelFile.VOCAB_FILE)) # load configuration config = MPlugConfig.from_yaml_file(osp.join(model_dir, CONFIG_NAME)) diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 8bf9943c..25576667 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -44,7 +44,7 @@ class Tokenize(Preprocessor): class SequenceClassificationPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path @@ -291,7 +291,7 @@ class ZeroShotClassificationPreprocessor(NLPTokenizerPreprocessorBase): """ def __init__(self, model_dir: str, mode=ModeKeys.INFERENCE, **kwargs): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path @@ -522,7 +522,7 @@ class NERPreprocessor(Preprocessor): """ def __init__(self, model_dir: str, *args, **kwargs): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path @@ -614,7 +614,7 @@ class TextErrorCorrectionPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): from fairseq.data import Dictionary - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data via the vocab file from the `model_dir` path Args: model_dir (str): model path diff --git a/modelscope/preprocessors/ofa/base.py b/modelscope/preprocessors/ofa/base.py index fb9d06cd..691f8b36 100644 --- a/modelscope/preprocessors/ofa/base.py +++ b/modelscope/preprocessors/ofa/base.py @@ -14,7 +14,7 @@ from .utils.random_help import set_torch_seed class OfaBasePreprocessor: def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: cfg(modelscope.utils.config.ConfigDict) : model config diff --git a/modelscope/preprocessors/ofa/image_captioning.py b/modelscope/preprocessors/ofa/image_captioning.py index 264c8e04..318a8a6d 100644 --- a/modelscope/preprocessors/ofa/image_captioning.py +++ b/modelscope/preprocessors/ofa/image_captioning.py @@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor class OfaImageCaptioningPreprocessor(OfaBasePreprocessor): def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: cfg(modelscope.utils.config.ConfigDict) : model config diff --git a/modelscope/preprocessors/ofa/image_classification.py b/modelscope/preprocessors/ofa/image_classification.py index 30289613..dd2de634 100644 --- a/modelscope/preprocessors/ofa/image_classification.py +++ b/modelscope/preprocessors/ofa/image_classification.py @@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor class OfaImageClassificationPreprocessor(OfaBasePreprocessor): def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: cfg(modelscope.utils.config.ConfigDict) : model config diff --git a/modelscope/preprocessors/ofa/summarization.py b/modelscope/preprocessors/ofa/summarization.py index fd5113cd..99028e61 100644 --- a/modelscope/preprocessors/ofa/summarization.py +++ b/modelscope/preprocessors/ofa/summarization.py @@ -7,7 +7,7 @@ from .base import OfaBasePreprocessor class OfaSummarizationPreprocessor(OfaBasePreprocessor): def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: cfg(modelscope.utils.config.ConfigDict) : model config diff --git a/modelscope/preprocessors/ofa/text_classification.py b/modelscope/preprocessors/ofa/text_classification.py index 1a3f84fd..5673a07f 100644 --- a/modelscope/preprocessors/ofa/text_classification.py +++ b/modelscope/preprocessors/ofa/text_classification.py @@ -7,7 +7,7 @@ from .base import OfaBasePreprocessor class OfaTextClassificationPreprocessor(OfaBasePreprocessor): def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: cfg(modelscope.utils.config.ConfigDict) : model config diff --git a/modelscope/preprocessors/ofa/text_to_image_synthesis.py b/modelscope/preprocessors/ofa/text_to_image_synthesis.py index 9dbba921..938f50de 100644 --- a/modelscope/preprocessors/ofa/text_to_image_synthesis.py +++ b/modelscope/preprocessors/ofa/text_to_image_synthesis.py @@ -9,7 +9,7 @@ from .base import OfaBasePreprocessor class OfaTextToImageSynthesisPreprocessor(OfaBasePreprocessor): def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path diff --git a/modelscope/preprocessors/ofa/visual_entailment.py b/modelscope/preprocessors/ofa/visual_entailment.py index 72e88d75..6002c4a6 100644 --- a/modelscope/preprocessors/ofa/visual_entailment.py +++ b/modelscope/preprocessors/ofa/visual_entailment.py @@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor class OfaVisualEntailmentPreprocessor(OfaBasePreprocessor): def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: cfg(modelscope.utils.config.ConfigDict) : model config diff --git a/modelscope/preprocessors/ofa/visual_grounding.py b/modelscope/preprocessors/ofa/visual_grounding.py index eebc4cf2..022e5788 100644 --- a/modelscope/preprocessors/ofa/visual_grounding.py +++ b/modelscope/preprocessors/ofa/visual_grounding.py @@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor class OfaVisualGroundingPreprocessor(OfaBasePreprocessor): def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: cfg(modelscope.utils.config.ConfigDict) : model config diff --git a/modelscope/preprocessors/ofa/visual_question_answering.py b/modelscope/preprocessors/ofa/visual_question_answering.py index b11af9f6..d34d1db0 100644 --- a/modelscope/preprocessors/ofa/visual_question_answering.py +++ b/modelscope/preprocessors/ofa/visual_question_answering.py @@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor class OfaVisualQuestionAnsweringPreprocessor(OfaBasePreprocessor): def __init__(self, cfg, model_dir): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: cfg(modelscope.utils.config.ConfigDict) : model config diff --git a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py index c7339538..e2602eaa 100644 --- a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py +++ b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py @@ -22,7 +22,7 @@ __all__ = ['DialogIntentPredictionPreprocessor'] class DialogIntentPredictionPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path diff --git a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py index 8ed97452..a2157c2b 100644 --- a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py +++ b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py @@ -20,7 +20,7 @@ __all__ = ['DialogModelingPreprocessor'] class DialogModelingPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path diff --git a/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py b/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py index 038ab09b..6eb17288 100644 --- a/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py +++ b/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py @@ -17,7 +17,7 @@ __all__ = ['DialogStateTrackingPreprocessor'] class DialogStateTrackingPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path diff --git a/modelscope/preprocessors/space/fields/gen_field.py b/modelscope/preprocessors/space/fields/gen_field.py index f924588c..5bff360f 100644 --- a/modelscope/preprocessors/space/fields/gen_field.py +++ b/modelscope/preprocessors/space/fields/gen_field.py @@ -8,6 +8,7 @@ from itertools import chain import numpy as np from modelscope.preprocessors.space.tokenizer import Tokenizer +from modelscope.utils.constant import ModelFile from modelscope.utils.logger import get_logger from modelscope.utils.nlp.space import ontology, utils from modelscope.utils.nlp.space.db_ops import MultiWozDB @@ -343,7 +344,7 @@ class MultiWOZBPETextField(BPETextField): ] special_tokens.extend(self.add_sepcial_tokens()) self.tokenizer = Tokenizer( - vocab_path=os.path.join(model_dir, 'vocab.txt'), + vocab_path=os.path.join(model_dir, ModelFile.VOCAB_FILE), special_tokens=special_tokens, tokenizer_type=config.BPETextField.tokenizer_type) self.understand_ids = self.tokenizer.convert_tokens_to_ids( diff --git a/modelscope/preprocessors/space/fields/intent_field.py b/modelscope/preprocessors/space/fields/intent_field.py index 4ed7ab6c..dc00e677 100644 --- a/modelscope/preprocessors/space/fields/intent_field.py +++ b/modelscope/preprocessors/space/fields/intent_field.py @@ -14,6 +14,7 @@ import numpy as np from tqdm import tqdm from modelscope.preprocessors.space.tokenizer import Tokenizer +from modelscope.utils.constant import ModelFile from modelscope.utils.nlp.space import ontology from modelscope.utils.nlp.space.scores import hierarchical_set_score from modelscope.utils.nlp.space.utils import list2np @@ -50,7 +51,7 @@ class BPETextField(object): ] special_tokens.extend(self.add_sepcial_tokens()) self.tokenizer = Tokenizer( - vocab_path=os.path.join(model_dir, 'vocab.txt'), + vocab_path=os.path.join(model_dir, ModelFile.VOCAB_FILE), special_tokens=special_tokens, tokenizer_type=config.BPETextField.tokenizer_type) self.understand_ids = self.numericalize(self.understand_tokens) diff --git a/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py b/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py index 2032dcf7..b5dd73a9 100644 --- a/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py +++ b/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py @@ -28,7 +28,7 @@ __all__ = ['ConversationalTextToSqlPreprocessor'] class ConversationalTextToSqlPreprocessor(Preprocessor): def __init__(self, model_dir: str, *args, **kwargs): - """preprocess the data via the vocab.txt from the `model_dir` path + """preprocess the data Args: model_dir (str): model path diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 5f327ddc..f2d69198 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -203,6 +203,8 @@ class ModelFile(object): TF_CKPT_PREFIX = 'ckpt-' TORCH_MODEL_FILE = 'pytorch_model.pt' TORCH_MODEL_BIN_FILE = 'pytorch_model.bin' + VOCAB_FILE = 'vocab.txt' + ONNX_MODEL_FILE = 'model.onnx' LABEL_MAPPING = 'label_mapping.json'