From f53b24233211d2af44494546420ec25e48811c13 Mon Sep 17 00:00:00 2001
From: Yingda Chen <yingda.chen@alibaba-inc.com>
Date: Tue, 16 Aug 2022 13:33:44 +0800
Subject: [PATCH] [to #42322933] add onnx model and onnx constant

---
 modelscope/models/multi_modal/clip/clip_model.py          | 4 ++--
 modelscope/models/multi_modal/diffusion/model.py          | 2 +-
 modelscope/models/multi_modal/mplug/__init__.py           | 3 +--
 modelscope/models/multi_modal/mplug/modeling_mplug.py     | 8 ++++----
 modelscope/models/multi_modal/ofa/tokenization_ofa.py     | 4 +++-
 .../models/multi_modal/ofa/tokenization_ofa_fast.py       | 3 ++-
 modelscope/models/nlp/structbert/tokenization_sbert.py    | 3 ++-
 .../models/nlp/structbert/tokenization_sbert_fast.py      | 3 ++-
 modelscope/preprocessors/multi_modal.py                   | 6 +++---
 modelscope/preprocessors/nlp.py                           | 8 ++++----
 modelscope/preprocessors/ofa/base.py                      | 2 +-
 modelscope/preprocessors/ofa/image_captioning.py          | 2 +-
 modelscope/preprocessors/ofa/image_classification.py      | 2 +-
 modelscope/preprocessors/ofa/summarization.py             | 2 +-
 modelscope/preprocessors/ofa/text_classification.py       | 2 +-
 modelscope/preprocessors/ofa/text_to_image_synthesis.py   | 2 +-
 modelscope/preprocessors/ofa/visual_entailment.py         | 2 +-
 modelscope/preprocessors/ofa/visual_grounding.py          | 2 +-
 modelscope/preprocessors/ofa/visual_question_answering.py | 2 +-
 .../space/dialog_intent_prediction_preprocessor.py        | 2 +-
 .../preprocessors/space/dialog_modeling_preprocessor.py   | 2 +-
 .../space/dialog_state_tracking_preprocessor.py           | 2 +-
 modelscope/preprocessors/space/fields/gen_field.py        | 3 ++-
 modelscope/preprocessors/space/fields/intent_field.py     | 3 ++-
 .../star/conversational_text_to_sql_preprocessor.py       | 2 +-
 modelscope/utils/constant.py                              | 2 ++
 26 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/modelscope/models/multi_modal/clip/clip_model.py b/modelscope/models/multi_modal/clip/clip_model.py
index e092f4af..738057ce 100644
--- a/modelscope/models/multi_modal/clip/clip_model.py
+++ b/modelscope/models/multi_modal/clip/clip_model.py
@@ -17,7 +17,7 @@ from modelscope.models import TorchModel
 from modelscope.models.builder import MODELS
 from modelscope.models.multi_modal.clip.clip_bert import TextTransformer
 from modelscope.models.multi_modal.clip.clip_vit import VisionTransformer
-from modelscope.utils.constant import ModeKeys, Tasks
+from modelscope.utils.constant import ModeKeys, ModelFile, Tasks
 from modelscope.utils.logger import get_logger
 
 logger = get_logger()
@@ -143,7 +143,7 @@ class CLIPForMultiModalEmbedding(TorchModel):
         ])
 
         # text tokenizer
-        vocab_path = '{}/vocab.txt'.format(model_dir)
+        vocab_path = f'{model_dir}/{ModelFile.VOCAB_FILE}'
         self.text_tokenizer = BertWordPieceTokenizer(
             vocab_path, lowercase=False)
         self.text_tokenizer.enable_truncation(max_length=30)
diff --git a/modelscope/models/multi_modal/diffusion/model.py b/modelscope/models/multi_modal/diffusion/model.py
index 4d61e2d1..8617b8dd 100644
--- a/modelscope/models/multi_modal/diffusion/model.py
+++ b/modelscope/models/multi_modal/diffusion/model.py
@@ -136,7 +136,7 @@ class DiffusionForTextToImageSynthesis(Model):
         self.unet_upsampler_1024 = diffusion_model.unet_upsampler_1024
 
         # text tokenizer
-        vocab_path = '{}/vocab.txt'.format(model_dir)
+        vocab_path = f'{model_dir}/{ModelFile.VOCAB_FILE}'
         self.tokenizer = Tokenizer(vocab_file=vocab_path, seq_len=64)
 
         # diffusion process
diff --git a/modelscope/models/multi_modal/mplug/__init__.py b/modelscope/models/multi_modal/mplug/__init__.py
index bca5849b..a145fc0c 100644
--- a/modelscope/models/multi_modal/mplug/__init__.py
+++ b/modelscope/models/multi_modal/mplug/__init__.py
@@ -14,5 +14,4 @@
 # limitations under the License.
 
 from .configuration_mplug import MPlugConfig
-from .modeling_mplug import (CONFIG_NAME, VOCAB_NAME,
-                             MPlugForVisualQuestionAnswering)
+from .modeling_mplug import CONFIG_NAME, MPlugForVisualQuestionAnswering
diff --git a/modelscope/models/multi_modal/mplug/modeling_mplug.py b/modelscope/models/multi_modal/mplug/modeling_mplug.py
index 0b45ea12..79fab718 100755
--- a/modelscope/models/multi_modal/mplug/modeling_mplug.py
+++ b/modelscope/models/multi_modal/mplug/modeling_mplug.py
@@ -42,14 +42,13 @@ from transformers.utils import logging
 
 from modelscope.models.multi_modal.mplug.configuration_mplug import MPlugConfig
 from modelscope.models.multi_modal.mplug.predictor import TextGenerator
+from modelscope.utils.constant import ModelFile
 
 transformers.logging.set_verbosity_error()
 
 logger = logging.get_logger(__name__)
 
 CONFIG_NAME = 'config.yaml'
-WEIGHTS_NAME = 'pytorch_model.bin'
-VOCAB_NAME = 'vocab.txt'
 
 _CONFIG_FOR_DOC = 'BertConfig'
 _TOKENIZER_FOR_DOC = 'BertTokenizer'
@@ -1733,7 +1732,7 @@ class MPlugForVisualQuestionAnswering(PreTrainedModel):
         super().__init__(config)
         self.config = config
         self.tokenizer = BertTokenizer.from_pretrained(
-            os.path.join(config.model_dir, VOCAB_NAME))
+            os.path.join(config.model_dir, ModelFile.VOCAB_FILE))
         self.module_setting(config)
         self.visual_encoder = self._initialize_clip(config)
         self.text_encoder = BertModel(
@@ -1751,7 +1750,8 @@ class MPlugForVisualQuestionAnswering(PreTrainedModel):
         config.model_dir = model_dir
         model = cls(config)
         if load_checkpoint:
-            checkpoint_path = os.path.join(model_dir, WEIGHTS_NAME)
+            checkpoint_path = os.path.join(model_dir,
+                                           ModelFile.TORCH_MODEL_BIN_FILE)
             checkpoint = torch.load(checkpoint_path, map_location='cpu')
             if 'model' in checkpoint:
                 state_dict = checkpoint['model']
diff --git a/modelscope/models/multi_modal/ofa/tokenization_ofa.py b/modelscope/models/multi_modal/ofa/tokenization_ofa.py
index 158905eb..fd50505c 100644
--- a/modelscope/models/multi_modal/ofa/tokenization_ofa.py
+++ b/modelscope/models/multi_modal/ofa/tokenization_ofa.py
@@ -22,6 +22,8 @@ from transformers.models.bert.tokenization_bert import (BasicTokenizer,
                                                         WordpieceTokenizer)
 from transformers.utils import logging
 
+from modelscope.utils.constant import ModelFile
+
 logger = logging.get_logger(__name__)
 
 VOCAB_FILES_NAMES = {'vocab_file': 'vocab.json', 'merges_file': 'merges.txt'}
@@ -42,7 +44,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
     'ofa-base': 1024,
 }
 
-VOCAB_FILES_NAMES_ZH = {'vocab_file': 'vocab.txt'}
+VOCAB_FILES_NAMES_ZH = {'vocab_file': ModelFile.VOCAB_FILE}
 
 PRETRAINED_VOCAB_FILES_MAP_ZH = {
     'vocab_file': {
diff --git a/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py b/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py
index 03d2d71e..db11370d 100644
--- a/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py
+++ b/modelscope/models/multi_modal/ofa/tokenization_ofa_fast.py
@@ -20,6 +20,7 @@ from transformers import PreTrainedTokenizerFast
 from transformers.models.bart.tokenization_bart_fast import BartTokenizerFast
 from transformers.utils import logging
 
+from modelscope.utils.constant import ModelFile
 from .tokenization_ofa import OFATokenizer, OFATokenizerZH
 
 logger = logging.get_logger(__name__)
@@ -50,7 +51,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
     'ofa-base': 1024,
 }
 
-VOCAB_FILES_NAMES_ZH = {'vocab_file': 'vocab.txt'}
+VOCAB_FILES_NAMES_ZH = {'vocab_file': ModelFile.VOCAB_FILE}
 
 PRETRAINED_VOCAB_FILES_MAP_ZH = {
     'vocab_file': {
diff --git a/modelscope/models/nlp/structbert/tokenization_sbert.py b/modelscope/models/nlp/structbert/tokenization_sbert.py
index cbf98746..3171e31d 100644
--- a/modelscope/models/nlp/structbert/tokenization_sbert.py
+++ b/modelscope/models/nlp/structbert/tokenization_sbert.py
@@ -23,11 +23,12 @@ from typing import List, Optional, Tuple
 from transformers.tokenization_utils import (PreTrainedTokenizer, _is_control,
                                              _is_punctuation, _is_whitespace)
 
+from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger
 
 logger = get_logger(__name__)
 
-VOCAB_FILES_NAMES = {'vocab_file': 'vocab.txt'}
+VOCAB_FILES_NAMES = {'vocab_file': ModelFile.VOCAB_FILE}
 
 PRETRAINED_VOCAB_FILES_MAP = {'vocab_file': {}}
 
diff --git a/modelscope/models/nlp/structbert/tokenization_sbert_fast.py b/modelscope/models/nlp/structbert/tokenization_sbert_fast.py
index 5b8d79cc..a0a81121 100644
--- a/modelscope/models/nlp/structbert/tokenization_sbert_fast.py
+++ b/modelscope/models/nlp/structbert/tokenization_sbert_fast.py
@@ -22,13 +22,14 @@ import transformers
 from tokenizers import normalizers
 from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
 
+from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger
 from .tokenization_sbert import SbertTokenizer
 
 logger = get_logger(__name__)
 
 VOCAB_FILES_NAMES = {
-    'vocab_file': 'vocab.txt',
+    'vocab_file': ModelFile.VOCAB_FILE,
     'tokenizer_file': 'tokenizer.json'
 }
 
diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py
index 65578e6a..7665e8b7 100644
--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -26,7 +26,7 @@ __all__ = [
 class OfaPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
@@ -97,13 +97,13 @@ class MPlugVisualQuestionAnsweringPreprocessor(Preprocessor):
 
         """
         from transformers import BertTokenizer
-        from modelscope.models.multi_modal.mplug import CONFIG_NAME, VOCAB_NAME, MPlugConfig
+        from modelscope.models.multi_modal.mplug import CONFIG_NAME, MPlugConfig
 
         super().__init__(*args, **kwargs)
 
         # tokenizer
         self.tokenizer = BertTokenizer.from_pretrained(
-            osp.join(model_dir, VOCAB_NAME))
+            osp.join(model_dir, ModelFile.VOCAB_FILE))
 
         # load configuration
         config = MPlugConfig.from_yaml_file(osp.join(model_dir, CONFIG_NAME))
diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py
index 8bf9943c..25576667 100644
--- a/modelscope/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -44,7 +44,7 @@ class Tokenize(Preprocessor):
 class SequenceClassificationPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
@@ -291,7 +291,7 @@ class ZeroShotClassificationPreprocessor(NLPTokenizerPreprocessorBase):
     """
 
     def __init__(self, model_dir: str, mode=ModeKeys.INFERENCE, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
@@ -522,7 +522,7 @@ class NERPreprocessor(Preprocessor):
     """
 
     def __init__(self, model_dir: str, *args, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
@@ -614,7 +614,7 @@ class TextErrorCorrectionPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
         from fairseq.data import Dictionary
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data via the vocab file from the `model_dir` path
 
         Args:
             model_dir (str): model path
diff --git a/modelscope/preprocessors/ofa/base.py b/modelscope/preprocessors/ofa/base.py
index fb9d06cd..691f8b36 100644
--- a/modelscope/preprocessors/ofa/base.py
+++ b/modelscope/preprocessors/ofa/base.py
@@ -14,7 +14,7 @@ from .utils.random_help import set_torch_seed
 class OfaBasePreprocessor:
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             cfg(modelscope.utils.config.ConfigDict) : model config
diff --git a/modelscope/preprocessors/ofa/image_captioning.py b/modelscope/preprocessors/ofa/image_captioning.py
index 264c8e04..318a8a6d 100644
--- a/modelscope/preprocessors/ofa/image_captioning.py
+++ b/modelscope/preprocessors/ofa/image_captioning.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             cfg(modelscope.utils.config.ConfigDict) : model config
diff --git a/modelscope/preprocessors/ofa/image_classification.py b/modelscope/preprocessors/ofa/image_classification.py
index 30289613..dd2de634 100644
--- a/modelscope/preprocessors/ofa/image_classification.py
+++ b/modelscope/preprocessors/ofa/image_classification.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaImageClassificationPreprocessor(OfaBasePreprocessor):
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             cfg(modelscope.utils.config.ConfigDict) : model config
diff --git a/modelscope/preprocessors/ofa/summarization.py b/modelscope/preprocessors/ofa/summarization.py
index fd5113cd..99028e61 100644
--- a/modelscope/preprocessors/ofa/summarization.py
+++ b/modelscope/preprocessors/ofa/summarization.py
@@ -7,7 +7,7 @@ from .base import OfaBasePreprocessor
 class OfaSummarizationPreprocessor(OfaBasePreprocessor):
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             cfg(modelscope.utils.config.ConfigDict) : model config
diff --git a/modelscope/preprocessors/ofa/text_classification.py b/modelscope/preprocessors/ofa/text_classification.py
index 1a3f84fd..5673a07f 100644
--- a/modelscope/preprocessors/ofa/text_classification.py
+++ b/modelscope/preprocessors/ofa/text_classification.py
@@ -7,7 +7,7 @@ from .base import OfaBasePreprocessor
 class OfaTextClassificationPreprocessor(OfaBasePreprocessor):
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             cfg(modelscope.utils.config.ConfigDict) : model config
diff --git a/modelscope/preprocessors/ofa/text_to_image_synthesis.py b/modelscope/preprocessors/ofa/text_to_image_synthesis.py
index 9dbba921..938f50de 100644
--- a/modelscope/preprocessors/ofa/text_to_image_synthesis.py
+++ b/modelscope/preprocessors/ofa/text_to_image_synthesis.py
@@ -9,7 +9,7 @@ from .base import OfaBasePreprocessor
 class OfaTextToImageSynthesisPreprocessor(OfaBasePreprocessor):
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
diff --git a/modelscope/preprocessors/ofa/visual_entailment.py b/modelscope/preprocessors/ofa/visual_entailment.py
index 72e88d75..6002c4a6 100644
--- a/modelscope/preprocessors/ofa/visual_entailment.py
+++ b/modelscope/preprocessors/ofa/visual_entailment.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaVisualEntailmentPreprocessor(OfaBasePreprocessor):
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             cfg(modelscope.utils.config.ConfigDict) : model config
diff --git a/modelscope/preprocessors/ofa/visual_grounding.py b/modelscope/preprocessors/ofa/visual_grounding.py
index eebc4cf2..022e5788 100644
--- a/modelscope/preprocessors/ofa/visual_grounding.py
+++ b/modelscope/preprocessors/ofa/visual_grounding.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaVisualGroundingPreprocessor(OfaBasePreprocessor):
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             cfg(modelscope.utils.config.ConfigDict) : model config
diff --git a/modelscope/preprocessors/ofa/visual_question_answering.py b/modelscope/preprocessors/ofa/visual_question_answering.py
index b11af9f6..d34d1db0 100644
--- a/modelscope/preprocessors/ofa/visual_question_answering.py
+++ b/modelscope/preprocessors/ofa/visual_question_answering.py
@@ -12,7 +12,7 @@ from .base import OfaBasePreprocessor
 class OfaVisualQuestionAnsweringPreprocessor(OfaBasePreprocessor):
 
     def __init__(self, cfg, model_dir):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             cfg(modelscope.utils.config.ConfigDict) : model config
diff --git a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
index c7339538..e2602eaa 100644
--- a/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_intent_prediction_preprocessor.py
@@ -22,7 +22,7 @@ __all__ = ['DialogIntentPredictionPreprocessor']
 class DialogIntentPredictionPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
diff --git a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py
index 8ed97452..a2157c2b 100644
--- a/modelscope/preprocessors/space/dialog_modeling_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_modeling_preprocessor.py
@@ -20,7 +20,7 @@ __all__ = ['DialogModelingPreprocessor']
 class DialogModelingPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
diff --git a/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py b/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py
index 038ab09b..6eb17288 100644
--- a/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_state_tracking_preprocessor.py
@@ -17,7 +17,7 @@ __all__ = ['DialogStateTrackingPreprocessor']
 class DialogStateTrackingPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
diff --git a/modelscope/preprocessors/space/fields/gen_field.py b/modelscope/preprocessors/space/fields/gen_field.py
index f924588c..5bff360f 100644
--- a/modelscope/preprocessors/space/fields/gen_field.py
+++ b/modelscope/preprocessors/space/fields/gen_field.py
@@ -8,6 +8,7 @@ from itertools import chain
 import numpy as np
 
 from modelscope.preprocessors.space.tokenizer import Tokenizer
+from modelscope.utils.constant import ModelFile
 from modelscope.utils.logger import get_logger
 from modelscope.utils.nlp.space import ontology, utils
 from modelscope.utils.nlp.space.db_ops import MultiWozDB
@@ -343,7 +344,7 @@ class MultiWOZBPETextField(BPETextField):
         ]
         special_tokens.extend(self.add_sepcial_tokens())
         self.tokenizer = Tokenizer(
-            vocab_path=os.path.join(model_dir, 'vocab.txt'),
+            vocab_path=os.path.join(model_dir, ModelFile.VOCAB_FILE),
             special_tokens=special_tokens,
             tokenizer_type=config.BPETextField.tokenizer_type)
         self.understand_ids = self.tokenizer.convert_tokens_to_ids(
diff --git a/modelscope/preprocessors/space/fields/intent_field.py b/modelscope/preprocessors/space/fields/intent_field.py
index 4ed7ab6c..dc00e677 100644
--- a/modelscope/preprocessors/space/fields/intent_field.py
+++ b/modelscope/preprocessors/space/fields/intent_field.py
@@ -14,6 +14,7 @@ import numpy as np
 from tqdm import tqdm
 
 from modelscope.preprocessors.space.tokenizer import Tokenizer
+from modelscope.utils.constant import ModelFile
 from modelscope.utils.nlp.space import ontology
 from modelscope.utils.nlp.space.scores import hierarchical_set_score
 from modelscope.utils.nlp.space.utils import list2np
@@ -50,7 +51,7 @@ class BPETextField(object):
         ]
         special_tokens.extend(self.add_sepcial_tokens())
         self.tokenizer = Tokenizer(
-            vocab_path=os.path.join(model_dir, 'vocab.txt'),
+            vocab_path=os.path.join(model_dir, ModelFile.VOCAB_FILE),
             special_tokens=special_tokens,
             tokenizer_type=config.BPETextField.tokenizer_type)
         self.understand_ids = self.numericalize(self.understand_tokens)
diff --git a/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py b/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py
index 2032dcf7..b5dd73a9 100644
--- a/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py
+++ b/modelscope/preprocessors/star/conversational_text_to_sql_preprocessor.py
@@ -28,7 +28,7 @@ __all__ = ['ConversationalTextToSqlPreprocessor']
 class ConversationalTextToSqlPreprocessor(Preprocessor):
 
     def __init__(self, model_dir: str, *args, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
+        """preprocess the data
 
         Args:
             model_dir (str): model path
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index 5f327ddc..f2d69198 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -203,6 +203,8 @@ class ModelFile(object):
     TF_CKPT_PREFIX = 'ckpt-'
     TORCH_MODEL_FILE = 'pytorch_model.pt'
     TORCH_MODEL_BIN_FILE = 'pytorch_model.bin'
+    VOCAB_FILE = 'vocab.txt'
+    ONNX_MODEL_FILE = 'model.onnx'
     LABEL_MAPPING = 'label_mapping.json'