diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py
index 6f3ff5f5..3e962d85 100644
--- a/modelscope/pipelines/cv/image_matting_pipeline.py
+++ b/modelscope/pipelines/cv/image_matting_pipeline.py
@@ -7,7 +7,7 @@ import PIL
 
 from modelscope.pipelines.base import Input
 from modelscope.preprocessors import load_image
-from modelscope.utils.constant import Tasks
+from modelscope.utils.constant import TF_GRAPH_FILE, Tasks
 from modelscope.utils.logger import get_logger
 from ..base import Pipeline
 from ..builder import PIPELINES
@@ -24,7 +24,7 @@ class ImageMattingPipeline(Pipeline):
         import tensorflow as tf
         if tf.__version__ >= '2.0':
             tf = tf.compat.v1
-        model_path = osp.join(self.model, 'matting_person.pb')
+        model_path = osp.join(self.model, TF_GRAPH_FILE)
 
         config = tf.ConfigProto(allow_soft_placement=True)
         config.gpu_options.allow_growth = True
diff --git a/modelscope/pydatasets/py_dataset.py b/modelscope/pydatasets/py_dataset.py
index 7d0edadb..78aedaa0 100644
--- a/modelscope/pydatasets/py_dataset.py
+++ b/modelscope/pydatasets/py_dataset.py
@@ -1,9 +1,9 @@
-import logging
 from typing import (Any, Callable, Dict, List, Mapping, Optional, Sequence,
                     Union)
 
 from datasets import Dataset, load_dataset
 
+from modelscope.utils.constant import Hubs
 from modelscope.utils.logger import get_logger
 
 logger = get_logger()
@@ -41,17 +41,17 @@ class PyDataset:
         return dataset
 
     @staticmethod
-    def load(
-        path: Union[str, list],
-        target: Optional[str] = None,
-        version: Optional[str] = None,
-        name: Optional[str] = None,
-        split: Optional[str] = None,
-        data_dir: Optional[str] = None,
-        data_files: Optional[Union[str, Sequence[str],
-                                   Mapping[str, Union[str,
-                                                      Sequence[str]]]]] = None
-    ) -> 'PyDataset':
+    def load(path: Union[str, list],
+             target: Optional[str] = None,
+             version: Optional[str] = None,
+             name: Optional[str] = None,
+             split: Optional[str] = None,
+             data_dir: Optional[str] = None,
+             data_files: Optional[Union[str, Sequence[str],
+                                        Mapping[str,
+                                                Union[str,
+                                                      Sequence[str]]]]] = None,
+             hub: Optional[Hubs] = None) -> 'PyDataset':
         """Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
             Args:
 
@@ -62,10 +62,15 @@ class PyDataset:
                 data_dir (str, optional): Defining the data_dir of the dataset configuration. I
                 data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s).
                 split (str, optional): Which split of the data to load.
+                hub (Hubs, optional): When loading from a remote hub, where it is from
 
             Returns:
                 PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset.
             """
+        if Hubs.modelscope == hub:
+            # TODO: parse data meta information from modelscope hub
+            # and possibly download data files to local (and update path)
+            print('getting data from modelscope hub')
         if isinstance(path, str):
             dataset = load_dataset(
                 path,
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index 41c9443b..9639daff 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -59,14 +59,30 @@ class Tasks(object):
 
 
 class InputFields(object):
-    """ Names for input data fileds in the input data for pipelines
+    """ Names for input data fields in the input data for pipelines
     """
     img = 'img'
     text = 'text'
     audio = 'audio'
 
 
+class Hubs(object):
+    """ Source from which an entity (such as a Dataset or Model) is stored
+    """
+    modelscope = 'modelscope'
+    huggingface = 'huggingface'
+
+
 # configuration filename
 # in order to avoid conflict with huggingface
 # config file we use maas_config instead
 CONFIGFILE = 'maas_config.json'
+
+README_FILE = 'README.md'
+TF_SAVED_MODEL_FILE = 'saved_model.pb'
+TF_GRAPH_FILE = 'tf_graph.pb'
+TF_CHECKPOINT_FOLDER = 'tf_ckpts'
+TF_CHECKPOINT_FILE = 'checkpoint'
+TORCH_MODEL_FILE = 'pytorch_model.bin'
+TENSORFLOW = 'tensorflow'
+PYTORCH = 'pytorch'
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index 53006317..69195bd1 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -16,14 +16,15 @@ from modelscope.utils.hub import get_model_cache_dir
 class ImageMattingTest(unittest.TestCase):
 
     def setUp(self) -> None:
-        self.model_id = 'damo/image-matting-person'
+        self.model_id = 'damo/cv_unet_image-matting_damo'
         # switch to False if downloading everytime is not desired
         purge_cache = True
         if purge_cache:
             shutil.rmtree(
                 get_model_cache_dir(self.model_id), ignore_errors=True)
 
-    def test_run(self):
+    @unittest.skip('deprecated, download model from model hub instead')
+    def test_run_with_direct_file_download(self):
         model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \
                      '.com/data/test/maas/image_matting/matting_person.pb'
         with tempfile.TemporaryDirectory() as tmp_dir:
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index 3e3faa1d..7f6dc77c 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -10,7 +10,7 @@ from modelscope.models.nlp import BertForSequenceClassification
 from modelscope.pipelines import SequenceClassificationPipeline, pipeline
 from modelscope.preprocessors import SequenceClassificationPreprocessor
 from modelscope.pydatasets import PyDataset
-from modelscope.utils.constant import Tasks
+from modelscope.utils.constant import Hubs, Tasks
 from modelscope.utils.hub import get_model_cache_dir
 
 
@@ -81,13 +81,15 @@ class SequenceClassificationTest(unittest.TestCase):
         text_classification = pipeline(
             task=Tasks.text_classification, model=self.model_id)
         result = text_classification(
-            PyDataset.load('glue', name='sst2', target='sentence'))
+            PyDataset.load(
+                'glue', name='sst2', target='sentence', hub=Hubs.huggingface))
         self.printDataset(result)
 
     def test_run_with_default_model(self):
         text_classification = pipeline(task=Tasks.text_classification)
         result = text_classification(
-            PyDataset.load('glue', name='sst2', target='sentence'))
+            PyDataset.load(
+                'glue', name='sst2', target='sentence', hub=Hubs.huggingface))
         self.printDataset(result)
 
     def test_run_with_dataset(self):
@@ -97,9 +99,9 @@ class SequenceClassificationTest(unittest.TestCase):
         text_classification = pipeline(
             Tasks.text_classification, model=model, preprocessor=preprocessor)
         # loaded from huggingface dataset
-        # TODO: add load_from parameter (an enum) LOAD_FROM.hugging_face
         # TODO: rename parameter as dataset_name and subset_name
-        dataset = PyDataset.load('glue', name='sst2', target='sentence')
+        dataset = PyDataset.load(
+            'glue', name='sst2', target='sentence', hub=Hubs.huggingface)
         result = text_classification(dataset)
         self.printDataset(result)