[to #42322933] add onnx model and onnx constant

3 years ago · f53b242332
--- a/modelscope/models/multi_modal/clip/clip_model.py
+++ b/modelscope/models/multi_modal/clip/clip_model.py
@@ -17,7 +17,7 @@ from modelscope.models import TorchModel
 from modelscope.models.builder import MODELS
 from modelscope.models.multi_modal.clip.clip_bert import TextTransformer
 from modelscope.models.multi_modal.clip.clip_vit import VisionTransformer
 from modelscope.utils.constant import ModeKeys, Tasks
 from modelscope.utils.constant import ModeKeys, ModelFile, Tasks
 from modelscope.utils.logger import get_logger

 logger = get_logger()
@@ -143,7 +143,7 @@ class CLIPForMultiModalEmbedding(TorchModel):
        ])

        # text tokenizer
        vocab_path = '{}/vocab.txt'.format(model_dir)
        vocab_path = f'{model_dir}/{ModelFile.VOCAB_FILE}'
        self.text_tokenizer = BertWordPieceTokenizer(
            vocab_path, lowercase=False)
        self.text_tokenizer.enable_truncation(max_length=30)
--- a/modelscope/models/multi_modal/diffusion/model.py
+++ b/modelscope/models/multi_modal/diffusion/model.py
@@ -136,7 +136,7 @@ class DiffusionForTextToImageSynthesis(Model):
        self.unet_upsampler_1024 = diffusion_model.unet_upsampler_1024

        # text tokenizer
        vocab_path = '{}/vocab.txt'.format(model_dir)
        vocab_path = f'{model_dir}/{ModelFile.VOCAB_FILE}'
        self.tokenizer = Tokenizer(vocab_file=vocab_path, seq_len=64)

        # diffusion process
--- a/modelscope/models/multi_modal/mplug/init.py
+++ b/modelscope/models/multi_modal/mplug/init.py
@@ -14,5 +14,4 @@
 # limitations under the License.

 from .configuration_mplug import MPlugConfig
 from .modeling_mplug import (CONFIG_NAME, VOCAB_NAME,
                             MPlugForVisualQuestionAnswering)
 from .modeling_mplug import CONFIG_NAME, MPlugForVisualQuestionAnswering
--- a/modelscope/models/multi_modal/mplug/modeling_mplug.py
+++ b/modelscope/models/multi_modal/mplug/modeling_mplug.py
@@ -42,14 +42,13 @@ from transformers.utils import logging

 from modelscope.models.multi_modal.mplug.configuration_mplug import MPlugConfig
 from modelscope.models.multi_modal.mplug.predictor import TextGenerator
 from modelscope.utils.constant import ModelFile

 transformers.logging.set_verbosity_error()

 logger = logging.get_logger(__name__)

 CONFIG_NAME = 'config.yaml'
 WEIGHTS_NAME = 'pytorch_model.bin'
 VOCAB_NAME = 'vocab.txt'

 _CONFIG_FOR_DOC = 'BertConfig'
 _TOKENIZER_FOR_DOC = 'BertTokenizer'
@@ -1733,7 +1732,7 @@ class MPlugForVisualQuestionAnswering(PreTrainedModel):
        super().__init__(config)
        self.config = config
        self.tokenizer = BertTokenizer.from_pretrained(
            os.path.join(config.model_dir, VOCAB_NAME))
            os.path.join(config.model_dir, ModelFile.VOCAB_FILE))
        self.module_setting(config)
        self.visual_encoder = self._initialize_clip(config)
        self.text_encoder = BertModel(
@@ -1751,7 +1750,8 @@ class MPlugForVisualQuestionAnswering(PreTrainedModel):
        config.model_dir = model_dir
        model = cls(config)
        if load_checkpoint:
            checkpoint_path = os.path.join(model_dir, WEIGHTS_NAME)
            checkpoint_path = os.path.join(model_dir,
                                           ModelFile.TORCH_MODEL_BIN_FILE)
            checkpoint = torch.load(checkpoint_path, map_location='cpu')
            if 'model' in checkpoint:
                state_dict = checkpoint['model']
--- a/modelscope/models/multi_modal/ofa/tokenization_ofa.py
+++ b/modelscope/models/multi_modal/ofa/tokenization_ofa.py
@@ -22,6 +22,8 @@ from transformers.models.bert.tokenization_bert import (BasicTokenizer,
                                                        WordpieceTokenizer)
 from transformers.utils import logging

 from modelscope.utils.constant import ModelFile

 logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {'vocab_file': 'vocab.json', 'merges_file': 'merges.txt'}
@@ -42,7 +44,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
    'ofa-base': 1024,
 }

 VOCAB_FILES_NAMES_ZH = {'vocab_file': 'vocab.txt'}
 VOCAB_FILES_NAMES_ZH = {'vocab_file': ModelFile.VOCAB_FILE}

 PRETRAINED_VOCAB_FILES_MAP_ZH = {
    'vocab_file': {
--- a/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py
+++ b/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py
@@ -20,6 +20,7 @@ from transformers import PreTrainedTokenizerFast
 from transformers.models.bart.tokenization_bart_fast import BartTokenizerFast
 from transformers.utils import logging

 from modelscope.utils.constant import ModelFile
 from .tokenization_ofa import OFATokenizer, OFATokenizerZH

 logger = logging.get_logger(__name__)
@@ -50,7 +51,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
    'ofa-base': 1024,
 }

 VOCAB_FILES_NAMES_ZH = {'vocab_file': 'vocab.txt'}
 VOCAB_FILES_NAMES_ZH = {'vocab_file': ModelFile.VOCAB_FILE}

 PRETRAINED_VOCAB_FILES_MAP_ZH = {
    'vocab_file': {
--- a/modelscope/models/nlp/structbert/tokenization_sbert.py
+++ b/modelscope/models/nlp/structbert/tokenization_sbert.py
@@ -23,11 +23,12 @@ from typing import List, Optional, Tuple
 from transformers.tokenization_utils import (PreTrainedTokenizer, _is_control,
                                             _is_punctuation, _is_whitespace)

 from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger

 logger = get_logger(__name__)

 VOCAB_FILES_NAMES = {'vocab_file': 'vocab.txt'}
 VOCAB_FILES_NAMES = {'vocab_file': ModelFile.VOCAB_FILE}

 PRETRAINED_VOCAB_FILES_MAP = {'vocab_file': {}}

--- a/modelscope/models/nlp/structbert/tokenization_sbert_fast.py
+++ b/modelscope/models/nlp/structbert/tokenization_sbert_fast.py
@@ -22,13 +22,14 @@ import transformers
 from tokenizers import normalizers
 from transformers.tokenization_utils_fast import PreTrainedTokenizerFast

 from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger
 from .tokenization_sbert import SbertTokenizer

 logger = get_logger(__name__)

 VOCAB_FILES_NAMES = {
    'vocab_file': 'vocab.txt',
    'vocab_file': ModelFile.VOCAB_FILE,
    'tokenizer_file': 'tokenizer.json'
 }

--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -26,7 +26,7 @@ __all__ = [
 class OfaPreprocessor(Preprocessor):

    def __init__(self, model_dir: str, *args, **kwargs):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
@@ -97,13 +97,13 @@ class MPlugVisualQuestionAnsweringPreprocessor(Preprocessor):

        """
        from transformers import BertTokenizer
        from modelscope.models.multi_modal.mplug import CONFIG_NAME, VOCAB_NAME, MPlugConfig
        from modelscope.models.multi_modal.mplug import CONFIG_NAME, MPlugConfig

        super().__init__(*args, **kwargs)

        # tokenizer
        self.tokenizer = BertTokenizer.from_pretrained(
            osp.join(model_dir, VOCAB_NAME))
            osp.join(model_dir, ModelFile.VOCAB_FILE))

        # load configuration
        config = MPlugConfig.from_yaml_file(osp.join(model_dir, CONFIG_NAME))
--- a/modelscope/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -44,7 +44,7 @@ class Tokenize(Preprocessor):
 class SequenceClassificationPreprocessor(Preprocessor):

    def __init__(self, model_dir: str, *args, **kwargs):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
@@ -291,7 +291,7 @@ class ZeroShotClassificationPreprocessor(NLPTokenizerPreprocessorBase):
    """

    def __init__(self, model_dir: str, mode=ModeKeys.INFERENCE, **kwargs):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
@@ -522,7 +522,7 @@ class NERPreprocessor(Preprocessor):
    """

    def __init__(self, model_dir: str, *args, **kwargs):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
@@ -614,7 +614,7 @@ class TextErrorCorrectionPreprocessor(Preprocessor):

    def __init__(self, model_dir: str, *args, **kwargs):
        from fairseq.data import Dictionary
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data via the vocab file from the `model_dir` path

        Args:
            model_dir (str): model path
--- a/modelscope/preprocessors/ofa/base.py
+++ b/modelscope/preprocessors/ofa/base.py
@@ -14,7 +14,7 @@ from .utils.random_help import set_torch_seed
 class OfaBasePreprocessor:

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
--- a/modelscope/preprocessors/ofa/image_captioning.py
+++ b/modelscope/preprocessors/ofa/image_captioning.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
--- a/modelscope/preprocessors/ofa/image_classification.py
+++ b/modelscope/preprocessors/ofa/image_classification.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaImageClassificationPreprocessor(OfaBasePreprocessor):

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
--- a/modelscope/preprocessors/ofa/summarization.py
+++ b/modelscope/preprocessors/ofa/summarization.py
@@ -7,7 +7,7 @@ from .base import OfaBasePreprocessor
 class OfaSummarizationPreprocessor(OfaBasePreprocessor):

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
--- a/modelscope/preprocessors/ofa/text_classification.py
+++ b/modelscope/preprocessors/ofa/text_classification.py
@@ -7,7 +7,7 @@ from .base import OfaBasePreprocessor
 class OfaTextClassificationPreprocessor(OfaBasePreprocessor):

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
--- a/modelscope/preprocessors/ofa/text_to_image_synthesis.py
+++ b/modelscope/preprocessors/ofa/text_to_image_synthesis.py
@@ -9,7 +9,7 @@ from .base import OfaBasePreprocessor
 class OfaTextToImageSynthesisPreprocessor(OfaBasePreprocessor):

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
--- a/modelscope/preprocessors/ofa/visual_entailment.py
+++ b/modelscope/preprocessors/ofa/visual_entailment.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaVisualEntailmentPreprocessor(OfaBasePreprocessor):

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
--- a/modelscope/preprocessors/ofa/visual_grounding.py
+++ b/modelscope/preprocessors/ofa/visual_grounding.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaVisualGroundingPreprocessor(OfaBasePreprocessor):

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
--- a/modelscope/preprocessors/ofa/visual_question_answering.py
+++ b/modelscope/preprocessors/ofa/visual_question_answering.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaVisualQuestionAnsweringPreprocessor(OfaBasePreprocessor):

    def __init__(self, cfg, model_dir):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
--- a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
@@ -22,7 +22,7 @@ __all__ = ['DialogIntentPredictionPreprocessor']
 class DialogIntentPredictionPreprocessor(Preprocessor):

    def __init__(self, model_dir: str, *args, **kwargs):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
--- a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py
@@ -20,7 +20,7 @@ __all__ = ['DialogModelingPreprocessor']
 class DialogModelingPreprocessor(Preprocessor):

    def __init__(self, model_dir: str, *args, **kwargs):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
--- a/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py
@@ -17,7 +17,7 @@ __all__ = ['DialogStateTrackingPreprocessor']
 class DialogStateTrackingPreprocessor(Preprocessor):

    def __init__(self, model_dir: str, *args, **kwargs):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
--- a/modelscope/preprocessors/space/fields/gen_field.py
+++ b/modelscope/preprocessors/space/fields/gen_field.py
@@ -8,6 +8,7 @@ from itertools import chain
 import numpy as np

 from modelscope.preprocessors.space.tokenizer import Tokenizer
 from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger
 from modelscope.utils.nlp.space import ontology, utils
 from modelscope.utils.nlp.space.db_ops import MultiWozDB
@@ -343,7 +344,7 @@ class MultiWOZBPETextField(BPETextField):
        ]
        special_tokens.extend(self.add_sepcial_tokens())
        self.tokenizer = Tokenizer(
            vocab_path=os.path.join(model_dir, 'vocab.txt'),
            vocab_path=os.path.join(model_dir, ModelFile.VOCAB_FILE),
            special_tokens=special_tokens,
            tokenizer_type=config.BPETextField.tokenizer_type)
        self.understand_ids = self.tokenizer.convert_tokens_to_ids(
--- a/modelscope/preprocessors/space/fields/intent_field.py
+++ b/modelscope/preprocessors/space/fields/intent_field.py
@@ -14,6 +14,7 @@ import numpy as np
 from tqdm import tqdm

 from modelscope.preprocessors.space.tokenizer import Tokenizer
 from modelscope.utils.constant import ModelFile
 from modelscope.utils.nlp.space import ontology
 from modelscope.utils.nlp.space.scores import hierarchical_set_score
 from modelscope.utils.nlp.space.utils import list2np
@@ -50,7 +51,7 @@ class BPETextField(object):
        ]
        special_tokens.extend(self.add_sepcial_tokens())
        self.tokenizer = Tokenizer(
            vocab_path=os.path.join(model_dir, 'vocab.txt'),
            vocab_path=os.path.join(model_dir, ModelFile.VOCAB_FILE),
            special_tokens=special_tokens,
            tokenizer_type=config.BPETextField.tokenizer_type)
        self.understand_ids = self.numericalize(self.understand_tokens)
--- a/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py
+++ b/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py
@@ -28,7 +28,7 @@ __all__ = ['ConversationalTextToSqlPreprocessor']
 class ConversationalTextToSqlPreprocessor(Preprocessor):

    def __init__(self, model_dir: str, *args, **kwargs):
        """preprocess the data via the vocab.txt from the `model_dir` path
        """preprocess the data

        Args:
            model_dir (str): model path
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -203,6 +203,8 @@ class ModelFile(object):
    TF_CKPT_PREFIX = 'ckpt-'
    TORCH_MODEL_FILE = 'pytorch_model.pt'
    TORCH_MODEL_BIN_FILE = 'pytorch_model.bin'
    VOCAB_FILE = 'vocab.txt'
    ONNX_MODEL_FILE = 'model.onnx'
    LABEL_MAPPING = 'label_mapping.json'