diff --git a/.dev_scripts/build_docs.sh b/.dev_scripts/build_docs.sh
index 9c8acdf1..dc76e6f4 100644
--- a/.dev_scripts/build_docs.sh
+++ b/.dev_scripts/build_docs.sh
@@ -4,5 +4,5 @@ rm -rf build
 
 # update api rst
 #rm -rf source/api/
-#sphinx-apidoc --module-first -o source/api/ ../maas_lib/
+#sphinx-apidoc --module-first -o source/api/ ../modelscope/
 make html
diff --git a/.dev_scripts/linter.sh b/.dev_scripts/linter.sh
index fb8ab19d..6468e42b 100644
--- a/.dev_scripts/linter.sh
+++ b/.dev_scripts/linter.sh
@@ -1,3 +1,3 @@
-yapf -r -i maas_lib/ configs/ tests/ setup.py
-isort -rc maas_lib/ configs/ tests/ setup.py
-flake8 maas_lib/ configs/ tests/ setup.py
+yapf -r -i modelscope/ configs/ tests/ setup.py
+isort -rc modelscope/ configs/ tests/ setup.py
+flake8 modelscope/ configs/ tests/ setup.py
diff --git a/LICENSE b/LICENSE
index 85ed3d3a..14cec7de 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2022-2023 Alibaba MaaS. All rights reserved.
+Copyright 2022-2023 Alibaba ModelScope. All rights reserved.
 
                                  Apache License
                            Version 2.0, January 2004
@@ -188,7 +188,7 @@ Copyright 2022-2023 Alibaba MaaS. All rights reserved.
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2020-2022 Alibaba MaaS.
+   Copyright 2020-2022 Alibaba ModelScope.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/MANIFEST.in b/MANIFEST.in
index 0a153dba..665d7e90 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1 @@
-recursive-include maas_lib/configs *.py
+recursive-include modelscope/configs *.py
diff --git a/README.md b/README.md
index dabe8726..944c1f07 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Introduction
 
-MaaS library is targeted to support training, evaluation and inference for the state of the art models provided by Mind and further support third-party models provided by users outside alibaba.
+ModelScope library is targeted to support training, evaluation and inference for the state of the art models provided by Mind and further support third-party models provided by users outside alibaba.
 
 # Design doc
 
diff --git a/configs/README.md b/configs/README.md
index 94499da7..3c3b6963 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -1 +1 @@
-This folder will host example configs for each model supported by maas_lib.
+This folder will host example configs for each model supported by modelscope.
diff --git a/docs/source/api/maas_lib.fileio.format.rst b/docs/source/api/maas_lib.fileio.format.rst
deleted file mode 100644
index 7c2c649d..00000000
--- a/docs/source/api/maas_lib.fileio.format.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-maas\_lib.fileio.format package
-===============================
-
-.. automodule:: maas_lib.fileio.format
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
-----------
-
-maas\_lib.fileio.format.base module
------------------------------------
-
-.. automodule:: maas_lib.fileio.format.base
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.fileio.format.json module
------------------------------------
-
-.. automodule:: maas_lib.fileio.format.json
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.fileio.format.yaml module
------------------------------------
-
-.. automodule:: maas_lib.fileio.format.yaml
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.fileio.rst b/docs/source/api/maas_lib.fileio.rst
deleted file mode 100644
index e9540208..00000000
--- a/docs/source/api/maas_lib.fileio.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-maas\_lib.fileio package
-========================
-
-.. automodule:: maas_lib.fileio
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Subpackages
------------
-
-.. toctree::
-   :maxdepth: 4
-
-   maas_lib.fileio.format
-
-Submodules
-----------
-
-maas\_lib.fileio.file module
-----------------------------
-
-.. automodule:: maas_lib.fileio.file
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.fileio.io module
---------------------------
-
-.. automodule:: maas_lib.fileio.io
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.models.nlp.rst b/docs/source/api/maas_lib.models.nlp.rst
deleted file mode 100644
index bd782ea8..00000000
--- a/docs/source/api/maas_lib.models.nlp.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-maas\_lib.models.nlp package
-============================
-
-.. automodule:: maas_lib.models.nlp
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
-----------
-
-maas\_lib.models.nlp.sequence\_classification\_model module
------------------------------------------------------------
-
-.. automodule:: maas_lib.models.nlp.sequence_classification_model
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.models.rst b/docs/source/api/maas_lib.models.rst
deleted file mode 100644
index 9e1874a3..00000000
--- a/docs/source/api/maas_lib.models.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-maas\_lib.models package
-========================
-
-.. automodule:: maas_lib.models
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Subpackages
------------
-
-.. toctree::
-   :maxdepth: 4
-
-   maas_lib.models.nlp
-
-Submodules
-----------
-
-maas\_lib.models.base module
-----------------------------
-
-.. automodule:: maas_lib.models.base
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.models.builder module
--------------------------------
-
-.. automodule:: maas_lib.models.builder
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.pipelines.audio.rst b/docs/source/api/maas_lib.pipelines.audio.rst
deleted file mode 100644
index 71e29b42..00000000
--- a/docs/source/api/maas_lib.pipelines.audio.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-maas\_lib.pipelines.audio package
-=================================
-
-.. automodule:: maas_lib.pipelines.audio
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.pipelines.cv.rst b/docs/source/api/maas_lib.pipelines.cv.rst
deleted file mode 100644
index 938ebb5a..00000000
--- a/docs/source/api/maas_lib.pipelines.cv.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-maas\_lib.pipelines.cv package
-==============================
-
-.. automodule:: maas_lib.pipelines.cv
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
-----------
-
-maas\_lib.pipelines.cv.image\_matting module
---------------------------------------------
-
-.. automodule:: maas_lib.pipelines.cv.image_matting
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.pipelines.multi_modal.rst b/docs/source/api/maas_lib.pipelines.multi_modal.rst
deleted file mode 100644
index 74a7bf43..00000000
--- a/docs/source/api/maas_lib.pipelines.multi_modal.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-maas\_lib.pipelines.multi\_modal package
-========================================
-
-.. automodule:: maas_lib.pipelines.multi_modal
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.preprocessors.rst b/docs/source/api/maas_lib.preprocessors.rst
deleted file mode 100644
index 5f70e808..00000000
--- a/docs/source/api/maas_lib.preprocessors.rst
+++ /dev/null
@@ -1,50 +0,0 @@
-maas\_lib.preprocessors package
-===============================
-
-.. automodule:: maas_lib.preprocessors
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
-----------
-
-maas\_lib.preprocessors.base module
------------------------------------
-
-.. automodule:: maas_lib.preprocessors.base
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.preprocessors.builder module
---------------------------------------
-
-.. automodule:: maas_lib.preprocessors.builder
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.preprocessors.common module
--------------------------------------
-
-.. automodule:: maas_lib.preprocessors.common
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.preprocessors.image module
-------------------------------------
-
-.. automodule:: maas_lib.preprocessors.image
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.preprocessors.nlp module
-----------------------------------
-
-.. automodule:: maas_lib.preprocessors.nlp
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.rst b/docs/source/api/maas_lib.rst
deleted file mode 100644
index 727b7986..00000000
--- a/docs/source/api/maas_lib.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-maas\_lib package
-=================
-
-.. automodule:: maas_lib
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Subpackages
------------
-
-.. toctree::
-   :maxdepth: 4
-
-   maas_lib.fileio
-   maas_lib.models
-   maas_lib.pipelines
-   maas_lib.preprocessors
-   maas_lib.utils
-
-Submodules
-----------
-
-maas\_lib.version module
-------------------------
-
-.. automodule:: maas_lib.version
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.trainers.nlp.rst b/docs/source/api/maas_lib.trainers.nlp.rst
deleted file mode 100644
index 71f484ca..00000000
--- a/docs/source/api/maas_lib.trainers.nlp.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-maas\_lib.trainers.nlp package
-==============================
-
-.. automodule:: maas_lib.trainers.nlp
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
-----------
-
-maas\_lib.trainers.nlp.sequence\_classification\_trainer module
----------------------------------------------------------------
-
-.. automodule:: maas_lib.trainers.nlp.sequence_classification_trainer
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.trainers.rst b/docs/source/api/maas_lib.trainers.rst
deleted file mode 100644
index eb90ee4f..00000000
--- a/docs/source/api/maas_lib.trainers.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-maas\_lib.trainers package
-==========================
-
-.. automodule:: maas_lib.trainers
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Subpackages
------------
-
-.. toctree::
-   :maxdepth: 4
-
-   maas_lib.trainers.nlp
-
-Submodules
-----------
-
-maas\_lib.trainers.base module
-------------------------------
-
-.. automodule:: maas_lib.trainers.base
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.trainers.builder module
----------------------------------
-
-.. automodule:: maas_lib.trainers.builder
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/maas_lib.utils.rst b/docs/source/api/maas_lib.utils.rst
deleted file mode 100644
index 17ead3eb..00000000
--- a/docs/source/api/maas_lib.utils.rst
+++ /dev/null
@@ -1,58 +0,0 @@
-maas\_lib.utils package
-=======================
-
-.. automodule:: maas_lib.utils
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
-----------
-
-maas\_lib.utils.config module
------------------------------
-
-.. automodule:: maas_lib.utils.config
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.utils.constant module
--------------------------------
-
-.. automodule:: maas_lib.utils.constant
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.utils.logger module
------------------------------
-
-.. automodule:: maas_lib.utils.logger
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.utils.pymod module
-----------------------------
-
-.. automodule:: maas_lib.utils.pymod
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.utils.registry module
--------------------------------
-
-.. automodule:: maas_lib.utils.registry
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-maas\_lib.utils.type\_assert module
------------------------------------
-
-.. automodule:: maas_lib.utils.type_assert
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/source/api/modelscope.fileio.format.rst b/docs/source/api/modelscope.fileio.format.rst
new file mode 100644
index 00000000..2c7b11de
--- /dev/null
+++ b/docs/source/api/modelscope.fileio.format.rst
@@ -0,0 +1,34 @@
+modelscope.fileio.format package
+================================
+
+.. automodule:: modelscope.fileio.format
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.fileio.format.base module
+------------------------------------
+
+.. automodule:: modelscope.fileio.format.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.fileio.format.json module
+------------------------------------
+
+.. automodule:: modelscope.fileio.format.json
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.fileio.format.yaml module
+------------------------------------
+
+.. automodule:: modelscope.fileio.format.yaml
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.fileio.rst b/docs/source/api/modelscope.fileio.rst
new file mode 100644
index 00000000..3f4ae1ca
--- /dev/null
+++ b/docs/source/api/modelscope.fileio.rst
@@ -0,0 +1,34 @@
+modelscope.fileio package
+=========================
+
+.. automodule:: modelscope.fileio
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   modelscope.fileio.format
+
+Submodules
+----------
+
+modelscope.fileio.file module
+-----------------------------
+
+.. automodule:: modelscope.fileio.file
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.fileio.io module
+---------------------------
+
+.. automodule:: modelscope.fileio.io
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.models.cv.cartoon.facelib.LK.rst b/docs/source/api/modelscope.models.cv.cartoon.facelib.LK.rst
new file mode 100644
index 00000000..848c7d67
--- /dev/null
+++ b/docs/source/api/modelscope.models.cv.cartoon.facelib.LK.rst
@@ -0,0 +1,18 @@
+modelscope.models.cv.cartoon.facelib.LK package
+===============================================
+
+.. automodule:: modelscope.models.cv.cartoon.facelib.LK
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.models.cv.cartoon.facelib.LK.lk module
+-------------------------------------------------
+
+.. automodule:: modelscope.models.cv.cartoon.facelib.LK.lk
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.models.cv.cartoon.facelib.rst b/docs/source/api/modelscope.models.cv.cartoon.facelib.rst
new file mode 100644
index 00000000..a81536b0
--- /dev/null
+++ b/docs/source/api/modelscope.models.cv.cartoon.facelib.rst
@@ -0,0 +1,50 @@
+modelscope.models.cv.cartoon.facelib package
+============================================
+
+.. automodule:: modelscope.models.cv.cartoon.facelib
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   modelscope.models.cv.cartoon.facelib.LK
+
+Submodules
+----------
+
+modelscope.models.cv.cartoon.facelib.config module
+--------------------------------------------------
+
+.. automodule:: modelscope.models.cv.cartoon.facelib.config
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.models.cv.cartoon.facelib.face\_detector module
+----------------------------------------------------------
+
+.. automodule:: modelscope.models.cv.cartoon.facelib.face_detector
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.models.cv.cartoon.facelib.face\_landmark module
+----------------------------------------------------------
+
+.. automodule:: modelscope.models.cv.cartoon.facelib.face_landmark
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.models.cv.cartoon.facelib.facer module
+-------------------------------------------------
+
+.. automodule:: modelscope.models.cv.cartoon.facelib.facer
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.models.cv.cartoon.mtcnn_pytorch.rst b/docs/source/api/modelscope.models.cv.cartoon.mtcnn_pytorch.rst
new file mode 100644
index 00000000..b5845af7
--- /dev/null
+++ b/docs/source/api/modelscope.models.cv.cartoon.mtcnn_pytorch.rst
@@ -0,0 +1,15 @@
+modelscope.models.cv.cartoon.mtcnn\_pytorch package
+===================================================
+
+.. automodule:: modelscope.models.cv.cartoon.mtcnn_pytorch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   modelscope.models.cv.cartoon.mtcnn_pytorch.src
diff --git a/docs/source/api/modelscope.models.cv.cartoon.mtcnn_pytorch.src.rst b/docs/source/api/modelscope.models.cv.cartoon.mtcnn_pytorch.src.rst
new file mode 100644
index 00000000..715cc292
--- /dev/null
+++ b/docs/source/api/modelscope.models.cv.cartoon.mtcnn_pytorch.src.rst
@@ -0,0 +1,26 @@
+modelscope.models.cv.cartoon.mtcnn\_pytorch.src package
+=======================================================
+
+.. automodule:: modelscope.models.cv.cartoon.mtcnn_pytorch.src
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.models.cv.cartoon.mtcnn\_pytorch.src.align\_trans module
+-------------------------------------------------------------------
+
+.. automodule:: modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.models.cv.cartoon.mtcnn\_pytorch.src.matlab\_cp2tform module
+-----------------------------------------------------------------------
+
+.. automodule:: modelscope.models.cv.cartoon.mtcnn_pytorch.src.matlab_cp2tform
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.models.cv.cartoon.rst b/docs/source/api/modelscope.models.cv.cartoon.rst
new file mode 100644
index 00000000..5a262e03
--- /dev/null
+++ b/docs/source/api/modelscope.models.cv.cartoon.rst
@@ -0,0 +1,27 @@
+modelscope.models.cv.cartoon package
+====================================
+
+.. automodule:: modelscope.models.cv.cartoon
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   modelscope.models.cv.cartoon.facelib
+   modelscope.models.cv.cartoon.mtcnn_pytorch
+
+Submodules
+----------
+
+modelscope.models.cv.cartoon.utils module
+-----------------------------------------
+
+.. automodule:: modelscope.models.cv.cartoon.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.models.cv.rst b/docs/source/api/modelscope.models.cv.rst
new file mode 100644
index 00000000..47ce3916
--- /dev/null
+++ b/docs/source/api/modelscope.models.cv.rst
@@ -0,0 +1,15 @@
+modelscope.models.cv package
+============================
+
+.. automodule:: modelscope.models.cv
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   modelscope.models.cv.cartoon
diff --git a/docs/source/api/modelscope.models.nlp.rst b/docs/source/api/modelscope.models.nlp.rst
new file mode 100644
index 00000000..f332aca8
--- /dev/null
+++ b/docs/source/api/modelscope.models.nlp.rst
@@ -0,0 +1,26 @@
+modelscope.models.nlp package
+=============================
+
+.. automodule:: modelscope.models.nlp
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.models.nlp.sequence\_classification\_model module
+------------------------------------------------------------
+
+.. automodule:: modelscope.models.nlp.sequence_classification_model
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.models.nlp.text\_generation\_model module
+----------------------------------------------------
+
+.. automodule:: modelscope.models.nlp.text_generation_model
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.models.rst b/docs/source/api/modelscope.models.rst
new file mode 100644
index 00000000..8f2870b3
--- /dev/null
+++ b/docs/source/api/modelscope.models.rst
@@ -0,0 +1,35 @@
+modelscope.models package
+=========================
+
+.. automodule:: modelscope.models
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   modelscope.models.cv
+   modelscope.models.nlp
+
+Submodules
+----------
+
+modelscope.models.base module
+-----------------------------
+
+.. automodule:: modelscope.models.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.models.builder module
+--------------------------------
+
+.. automodule:: modelscope.models.builder
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.pipelines.audio.rst b/docs/source/api/modelscope.pipelines.audio.rst
new file mode 100644
index 00000000..f162893f
--- /dev/null
+++ b/docs/source/api/modelscope.pipelines.audio.rst
@@ -0,0 +1,7 @@
+modelscope.pipelines.audio package
+==================================
+
+.. automodule:: modelscope.pipelines.audio
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.pipelines.cv.rst b/docs/source/api/modelscope.pipelines.cv.rst
new file mode 100644
index 00000000..3f2da3f4
--- /dev/null
+++ b/docs/source/api/modelscope.pipelines.cv.rst
@@ -0,0 +1,26 @@
+modelscope.pipelines.cv package
+===============================
+
+.. automodule:: modelscope.pipelines.cv
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.pipelines.cv.image\_cartoon\_pipeline module
+-------------------------------------------------------
+
+.. automodule:: modelscope.pipelines.cv.image_cartoon_pipeline
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.pipelines.cv.image\_matting\_pipeline module
+-------------------------------------------------------
+
+.. automodule:: modelscope.pipelines.cv.image_matting_pipeline
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.pipelines.multi_modal.rst b/docs/source/api/modelscope.pipelines.multi_modal.rst
new file mode 100644
index 00000000..36df1c7c
--- /dev/null
+++ b/docs/source/api/modelscope.pipelines.multi_modal.rst
@@ -0,0 +1,18 @@
+modelscope.pipelines.multi\_modal package
+=========================================
+
+.. automodule:: modelscope.pipelines.multi_modal
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.pipelines.multi\_modal.image\_captioning module
+----------------------------------------------------------
+
+.. automodule:: modelscope.pipelines.multi_modal.image_captioning
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.pipelines.nlp.rst b/docs/source/api/modelscope.pipelines.nlp.rst
new file mode 100644
index 00000000..836d914f
--- /dev/null
+++ b/docs/source/api/modelscope.pipelines.nlp.rst
@@ -0,0 +1,26 @@
+modelscope.pipelines.nlp package
+================================
+
+.. automodule:: modelscope.pipelines.nlp
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.pipelines.nlp.sequence\_classification\_pipeline module
+------------------------------------------------------------------
+
+.. automodule:: modelscope.pipelines.nlp.sequence_classification_pipeline
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.pipelines.nlp.text\_generation\_pipeline module
+----------------------------------------------------------
+
+.. automodule:: modelscope.pipelines.nlp.text_generation_pipeline
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.pipelines.rst b/docs/source/api/modelscope.pipelines.rst
new file mode 100644
index 00000000..167b5cd3
--- /dev/null
+++ b/docs/source/api/modelscope.pipelines.rst
@@ -0,0 +1,53 @@
+modelscope.pipelines package
+============================
+
+.. automodule:: modelscope.pipelines
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   modelscope.pipelines.audio
+   modelscope.pipelines.cv
+   modelscope.pipelines.multi_modal
+   modelscope.pipelines.nlp
+
+Submodules
+----------
+
+modelscope.pipelines.base module
+--------------------------------
+
+.. automodule:: modelscope.pipelines.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.pipelines.builder module
+-----------------------------------
+
+.. automodule:: modelscope.pipelines.builder
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.pipelines.default module
+-----------------------------------
+
+.. automodule:: modelscope.pipelines.default
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.pipelines.util module
+--------------------------------
+
+.. automodule:: modelscope.pipelines.util
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.preprocessors.rst b/docs/source/api/modelscope.preprocessors.rst
new file mode 100644
index 00000000..b555198d
--- /dev/null
+++ b/docs/source/api/modelscope.preprocessors.rst
@@ -0,0 +1,50 @@
+modelscope.preprocessors package
+================================
+
+.. automodule:: modelscope.preprocessors
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.preprocessors.base module
+------------------------------------
+
+.. automodule:: modelscope.preprocessors.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.preprocessors.builder module
+---------------------------------------
+
+.. automodule:: modelscope.preprocessors.builder
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.preprocessors.common module
+--------------------------------------
+
+.. automodule:: modelscope.preprocessors.common
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.preprocessors.image module
+-------------------------------------
+
+.. automodule:: modelscope.preprocessors.image
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.preprocessors.nlp module
+-----------------------------------
+
+.. automodule:: modelscope.preprocessors.nlp
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.pydatasets.rst b/docs/source/api/modelscope.pydatasets.rst
new file mode 100644
index 00000000..2508a91f
--- /dev/null
+++ b/docs/source/api/modelscope.pydatasets.rst
@@ -0,0 +1,18 @@
+modelscope.pydatasets package
+=============================
+
+.. automodule:: modelscope.pydatasets
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.pydatasets.py\_dataset module
+----------------------------------------
+
+.. automodule:: modelscope.pydatasets.py_dataset
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modelscope.rst b/docs/source/api/modelscope.rst
new file mode 100644
index 00000000..efab568b
--- /dev/null
+++ b/docs/source/api/modelscope.rst
@@ -0,0 +1,32 @@
+modelscope package
+==================
+
+.. automodule:: modelscope
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   modelscope.fileio
+   modelscope.models
+   modelscope.pipelines
+   modelscope.preprocessors
+   modelscope.pydatasets
+   modelscope.trainers
+   modelscope.utils
+
+Submodules
+----------
+
+modelscope.version module
+-------------------------
+
+.. automodule:: modelscope.version
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/maas_lib.pipelines.nlp.rst b/docs/source/api/modelscope.trainers.nlp.rst
similarity index 52%
rename from docs/source/api/maas_lib.pipelines.nlp.rst
rename to docs/source/api/modelscope.trainers.nlp.rst
index d41c09ad..4bc2f875 100644
--- a/docs/source/api/maas_lib.pipelines.nlp.rst
+++ b/docs/source/api/modelscope.trainers.nlp.rst
@@ -1,7 +1,7 @@
-maas\_lib.pipelines.nlp package
+modelscope.trainers.nlp package
 ===============================
 
-.. automodule:: maas_lib.pipelines.nlp
+.. automodule:: modelscope.trainers.nlp
    :members:
    :undoc-members:
    :show-inheritance:
@@ -9,10 +9,10 @@ maas\_lib.pipelines.nlp package
 Submodules
 ----------
 
-maas\_lib.pipelines.nlp.sequence\_classification\_pipeline module
------------------------------------------------------------------
+modelscope.trainers.nlp.sequence\_classification\_trainer module
+----------------------------------------------------------------
 
-.. automodule:: maas_lib.pipelines.nlp.sequence_classification_pipeline
+.. automodule:: modelscope.trainers.nlp.sequence_classification_trainer
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/source/api/maas_lib.pipelines.rst b/docs/source/api/modelscope.trainers.rst
similarity index 53%
rename from docs/source/api/maas_lib.pipelines.rst
rename to docs/source/api/modelscope.trainers.rst
index 40b82adc..aac4fb99 100644
--- a/docs/source/api/maas_lib.pipelines.rst
+++ b/docs/source/api/modelscope.trainers.rst
@@ -1,7 +1,7 @@
-maas\_lib.pipelines package
+modelscope.trainers package
 ===========================
 
-.. automodule:: maas_lib.pipelines
+.. automodule:: modelscope.trainers
    :members:
    :undoc-members:
    :show-inheritance:
@@ -12,25 +12,23 @@ Subpackages
 .. toctree::
    :maxdepth: 4
 
-   maas_lib.pipelines.cv
-   maas_lib.pipelines.multi_modal
-   maas_lib.pipelines.nlp
+   modelscope.trainers.nlp
 
 Submodules
 ----------
 
-maas\_lib.pipelines.base module
+modelscope.trainers.base module
 -------------------------------
 
-.. automodule:: maas_lib.pipelines.base
+.. automodule:: modelscope.trainers.base
    :members:
    :undoc-members:
    :show-inheritance:
 
-maas\_lib.pipelines.builder module
+modelscope.trainers.builder module
 ----------------------------------
 
-.. automodule:: maas_lib.pipelines.builder
+.. automodule:: modelscope.trainers.builder
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/source/api/modelscope.utils.rst b/docs/source/api/modelscope.utils.rst
new file mode 100644
index 00000000..0a78d4f4
--- /dev/null
+++ b/docs/source/api/modelscope.utils.rst
@@ -0,0 +1,66 @@
+modelscope.utils package
+========================
+
+.. automodule:: modelscope.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+modelscope.utils.config module
+------------------------------
+
+.. automodule:: modelscope.utils.config
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.utils.constant module
+--------------------------------
+
+.. automodule:: modelscope.utils.constant
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.utils.hub module
+---------------------------
+
+.. automodule:: modelscope.utils.hub
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.utils.logger module
+------------------------------
+
+.. automodule:: modelscope.utils.logger
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.utils.pymod module
+-----------------------------
+
+.. automodule:: modelscope.utils.pymod
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.utils.registry module
+--------------------------------
+
+.. automodule:: modelscope.utils.registry
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+modelscope.utils.type\_assert module
+------------------------------------
+
+.. automodule:: modelscope.utils.type_assert
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/modules.rst b/docs/source/api/modules.rst
index 84eecc70..0f83e90c 100644
--- a/docs/source/api/modules.rst
+++ b/docs/source/api/modules.rst
@@ -1,7 +1,7 @@
-maas_lib
-========
+modelscope
+==========
 
 .. toctree::
    :maxdepth: 4
 
-   maas_lib
+   modelscope
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 4cdcd956..2c2a0017 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -18,10 +18,10 @@ import sphinx_rtd_theme
 sys.path.insert(0, os.path.abspath('../../'))
 # -- Project information -----------------------------------------------------
 
-project = 'maas_lib'
-copyright = '2022-2023, Alibaba MaaS'
-author = 'maas_lib Authors'
-version_file = '../../maas_lib/version.py'
+project = 'modelscope'
+copyright = '2022-2023, Alibaba ModelScope'
+author = 'modelscope Authors'
+version_file = '../../modelscope/version.py'
 
 
 def get_version():
@@ -88,7 +88,7 @@ html_static_path = ['_static']
 
 # -- Options for HTMLHelp output ---------------------------------------------
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'maas_lib_doc'
+htmlhelp_basename = 'modelscope_doc'
 
 # -- Extension configuration -------------------------------------------------
 # Ignore >>> when copying code
diff --git a/docs/source/develop.md b/docs/source/develop.md
index 4d0812ae..c048bef7 100644
--- a/docs/source/develop.md
+++ b/docs/source/develop.md
@@ -10,39 +10,86 @@ We use the following toolsseed isortseed isortseed isort for linting and formatt
 
 Style configurations of yapf and isort can be found in [setup.cfg](../../setup.cfg).
 We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `seed-isort-config`, `isort`, `trailing whitespaces`,
- fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.
- The config for a pre-commit hook is stored in [.pre-commit-config](../../.pre-commit-config.yaml).
- After you clone the repository, you will need to install initialize pre-commit hook.
- ```bash
- pip install -r requirements/tests.txt
- ```
- From the repository folder
- ```bash
- pre-commit install
- ```
-
- After this on every commit check code linters and formatter will be enforced.
-
- If you want to use pre-commit to check all the files, you can run
- ```bash
- pre-commit run --all-files
- ```
-
- If you only want to format and lint your code, you can run
- ```bash
- make linter
- ```
-
- ## 2. Test
- ### 2.1 Unit test
- ```bash
- make test
- ```
-
- ### 2.2 Test data
- TODO
-
- ## 3. Build pip package
- ```bash
- make whl
- ```
+fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.
+The config for a pre-commit hook is stored in [.pre-commit-config](../../.pre-commit-config.yaml).
+After you clone the repository, you will need to install initialize pre-commit hook.
+```bash
+pip install -r requirements/tests.txt
+```
+From the repository folder
+```bash
+pre-commit install
+```
+
+After this on every commit check code linters and formatter will be enforced.
+
+If you want to use pre-commit to check all the files, you can run
+```bash
+pre-commit run --all-files
+```
+
+If you only want to format and lint your code, you can run
+```bash
+make linter
+```
+
+## 2. Test
+### 2.1 Unit test
+```bash
+make test
+```
+
+### 2.2 Test data
+TODO
+
+## Code Review
+
+1. Run following command to create an aone CR, replace `TARGET_BRANCH` and `CR_NAME` with the one you want.
+    ```shell
+    git push origin HEAD:refs/for/TARGET_BRANCH/CR_NAME
+    ```
+
+    Please refer to [https://yuque.antfin.com/aone/platform/lcg8yr](https://yuque.antfin.com/aone/platform/lcg8yr) for more details.
+
+    The following output is expected.
+    ```shell
+    Counting objects: 5, done.
+    Delta compression using up to 96 threads.
+    Compressing objects: 100% (5/5), done.
+    Writing objects: 100% (5/5), 543 bytes | 0 bytes/s, done.
+    Total 5 (delta 4), reused 0 (delta 0)
+    remote: +------------------------------------------------------------------------+
+    remote: | Merge Request #8949062 was created or updated.                         |
+    remote: | View merge request at URL:                                             |
+    remote: | https://code.aone.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8949062 |
+    remote: +------------------------------------------------------------------------+
+    To git@gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib.git
+    * [new branch]      HEAD -> refs/for/master/support_kwargs_pipeline
+    ```
+
+2. Open the remote url `https://code.aone.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/ID` and edit the title of CR with following format before merging your code:
+    * Feature
+        ```shell
+        [to #AONE_ID] feat: commit title
+
+        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8949062
+
+        * commit msg1
+        * commit msg2
+        ```
+    * Bugfix
+        ```shell
+        [to #AONE_ID] fix: commit title
+
+        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8949062
+
+        * commit msg1
+        * commit msg2
+        ```
+
+
+
+## Build pip package
+```bash
+make whl
+```
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0ca63b41..3b223531 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,11 +1,11 @@
-.. maas_lib documentation file,
+.. modelscope documentation file,
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-MaasLib DOCUMENTATION
+ModelScope DOCUMENTATION
 =======================================
 
-MaasLib doc
+ModelScope doc
 
 .. toctree::
    :maxdepth: 2
@@ -30,11 +30,11 @@ MaasLib doc
    :maxdepth: 10
    :caption: API Doc
 
-   api/maas_lib.preprocessors
-   api/maas_lib.models
-   api/maas_lib.pipelines
-   api/maas_lib.fileio
-   api/maas_lib.utils
+   api/modelscope.preprocessors
+   api/modelscope.models
+   api/modelscope.pipelines
+   api/modelscope.fileio
+   api/modelscope.utils
 
 
 Indices and tables
diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md
index 3c961097..0f4cbbc3 100644
--- a/docs/source/quick_start.md
+++ b/docs/source/quick_start.md
@@ -5,39 +5,39 @@
 
 安装完成后，执行如下命令为maas library创建对应的python环境。
 ```shell
-conda create -n maas python=3.6
-conda activate maas
+conda create -n modelscope python=3.6
+conda activate modelscope
 ```
 检查python和pip命令是否切换到conda环境下。
 ```shell
 which python
-# ~/workspace/anaconda3/envs/maas/bin/python
+# ~/workspace/anaconda3/envs/modelscope/bin/python
 
 which pip
-# ~/workspace/anaconda3/envs/maas/bin/pip
+# ~/workspace/anaconda3/envs/modelscope/bin/pip
 ```
 注： 本项目只支持`python3`环境，请勿使用python2环境。
 
 ## 第三方依赖安装
 
-MaaS Library目前支持tensorflow，pytorch两大深度学习框架进行模型训练、推理， 在Python 3.6+,  Pytorch 1.8+, Tensorflow 2.6上测试可运行，用户可以根据所选模型对应的计算框架进行安装，可以参考如下链接进行安装所需框架:
+ModelScope Library目前支持tensorflow，pytorch两大深度学习框架进行模型训练、推理， 在Python 3.6+,  Pytorch 1.8+, Tensorflow 2.6上测试可运行，用户可以根据所选模型对应的计算框架进行安装，可以参考如下链接进行安装所需框架:
 
 * [Pytorch安装指导](https://pytorch.org/get-started/locally/)
 * [Tensorflow安装指导](https://www.tensorflow.org/install/pip)
 
 
-## MaaS library 安装
+## ModelScope library 安装
 
 注： 如果在安装过程中遇到错误，请前往[常见问题](faq.md)查找解决方案。
 
 ### pip安装
 ```shell
-pip install -r http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/maas/maas.txt
+pip install -r http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/maas/modelscope.txt
 ```
 
 安装成功后，可以执行如下命令进行验证安装是否正确
 ```shell
-python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting',model='damo/image-matting-person')('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'))"
+python -c "from modelscope.pipelines import pipeline;print(pipeline('image-matting',model='damo/image-matting-person')('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'))"
 ```
 
 
@@ -45,11 +45,11 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting
 
 适合本地开发调试使用，修改源码后可以直接执行
 ```shell
-git clone git@gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib.git maaslib
+git clone git@gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib.git modelscope
 git fetch origin master
 git checkout master
 
-cd maaslib
+cd modelscope
 
 #安装依赖
 pip install -r requirements.txt
@@ -60,7 +60,7 @@ export PYTHONPATH=`pwd`
 
 安装成功后，可以执行如下命令进行验证安装是否正确
 ```shell
-python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting',model='damo/image-matting-person')('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'))"
+python -c "from modelscope.pipelines import pipeline;print(pipeline('image-matting',model='damo/image-matting-person')('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'))"
 ```
 
 
@@ -79,8 +79,8 @@ pipeline函数提供了简洁的推理接口，示例如下， 更多pipeline介
 ```python
 import cv2
 import os.path as osp
-from maas_lib.pipelines import pipeline
-from maas_lib.utils.constant import Tasks
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
 
 # 根据任务名创建pipeline
 img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person')
@@ -95,12 +95,13 @@ print(f'Output written to {osp.abspath("result.png")}')
 ```
 
 此外，pipeline接口也能接收Dataset作为输入，上面的代码同样可以实现为
+
 ```python
 import cv2
 import os.path as osp
-from maas_lib.pipelines import pipeline
-from maas_lib.utils.constant import Tasks
-from ali_maas_datasets import PyDataset
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.pydatasets import PyDataset
 
 # 使用图像url构建PyDataset，此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹
 input_location = [
diff --git a/docs/source/tutorials/pipeline.md b/docs/source/tutorials/pipeline.md
index 512e64ee..cc851278 100644
--- a/docs/source/tutorials/pipeline.md
+++ b/docs/source/tutorials/pipeline.md
@@ -19,7 +19,7 @@
 1. pipeline函数支持指定特定任务名称，加载任务默认模型，创建对应Pipeline对象
    执行如下python代码
    ```python
-   >>> from maas_lib.pipelines import pipeline
+   >>> from modelscope.pipelines import pipeline
    >>> img_matting = pipeline(task='image-matting', model='damo/image-matting-person')
    ```
 
@@ -65,8 +65,8 @@ wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/easynlp_modelz
 
 创建tokenizer和模型
 ```python
->>> from maas_lib.models import Model
->>> from maas_lib.preprocessors import SequenceClassificationPreprocessor
+>>> from modelscope.models import Model
+>>> from modelscope.preprocessors import SequenceClassificationPreprocessor
 >>> model = Model.from_pretrained('damo/bert-base-sst2')
 >>> tokenizer = SequenceClassificationPreprocessor(
             model.model_dir, first_sequence='sentence', second_sequence=None)
@@ -74,7 +74,7 @@ wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/easynlp_modelz
 
 使用tokenizer和模型对象创建pipeline
 ```python
->>> from maas_lib.pipelines import pipeline
+>>> from modelscope.pipelines import pipeline
 >>> semantic_cls = pipeline('text-classification', model=model,   preprocessor=tokenizer)
 >>> semantic_cls("Hello world!")
 ```
diff --git a/maas_lib/pipelines/builder.py b/maas_lib/pipelines/builder.py
deleted file mode 100644
index 703dd33f..00000000
--- a/maas_lib/pipelines/builder.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-import os.path as osp
-from typing import Union
-
-import json
-from maas_hub.file_download import model_file_download
-
-from maas_lib.models.base import Model
-from maas_lib.utils.config import Config, ConfigDict
-from maas_lib.utils.constant import CONFIGFILE, Tasks
-from maas_lib.utils.registry import Registry, build_from_cfg
-from .base import Pipeline
-from .util import is_model_name
-
-PIPELINES = Registry('pipelines')
-
-
-def build_pipeline(cfg: ConfigDict,
-                   task_name: str = None,
-                   default_args: dict = None):
-    """ build pipeline given model config dict.
-
-    Args:
-        cfg (:obj:`ConfigDict`): config dict for model object.
-        task_name (str, optional):  task name, refer to
-            :obj:`Tasks` for more details.
-        default_args (dict, optional): Default initialization arguments.
-    """
-    return build_from_cfg(
-        cfg, PIPELINES, group_key=task_name, default_args=default_args)
-
-
-def pipeline(task: str = None,
-             model: Union[str, Model] = None,
-             preprocessor=None,
-             config_file: str = None,
-             pipeline_name: str = None,
-             framework: str = None,
-             device: int = -1,
-             **kwargs) -> Pipeline:
-    """ Factory method to build a obj:`Pipeline`.
-
-
-    Args:
-        task (str): Task name defining which pipeline will be returned.
-        model (str or obj:`Model`): model name or model object.
-        preprocessor: preprocessor object.
-        config_file (str, optional): path to config file.
-        pipeline_name (str, optional): pipeline class name or alias name.
-        framework (str, optional): framework type.
-        device (int, optional): which device is used to do inference.
-
-    Return:
-        pipeline (obj:`Pipeline`): pipeline object for certain task.
-
-    Examples:
-    ```python
-    >>> p = pipeline('image-classification')
-    >>> p = pipeline('text-classification', model='distilbert-base-uncased')
-    >>>  # Using model object
-    >>> resnet = Model.from_pretrained('Resnet')
-    >>> p = pipeline('image-classification', model=resnet)
-    """
-    if task is None and pipeline_name is None:
-        raise ValueError('task or pipeline_name is required')
-
-    if pipeline_name is None:
-        # get default pipeline for this task
-        assert task in PIPELINES.modules, f'No pipeline is registerd for Task {task}'
-        pipeline_name = get_default_pipeline(task)
-
-    cfg = ConfigDict(type=pipeline_name)
-
-    if model:
-        assert isinstance(model, (str, Model)), \
-            f'model should be either str or Model, but got {type(model)}'
-        cfg.model = model
-
-    if preprocessor is not None:
-        cfg.preprocessor = preprocessor
-
-    return build_pipeline(cfg, task_name=task)
-
-
-def get_default_pipeline(task):
-    return list(PIPELINES.modules[task].keys())[0]
diff --git a/maas_lib/pipelines/cv/__init__.py b/maas_lib/pipelines/cv/__init__.py
deleted file mode 100644
index 79548682..00000000
--- a/maas_lib/pipelines/cv/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .image_matting import ImageMatting
diff --git a/maas_lib/pipelines/util.py b/maas_lib/pipelines/util.py
deleted file mode 100644
index 3e907359..00000000
--- a/maas_lib/pipelines/util.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import os.path as osp
-
-import json
-from maas_hub.file_download import model_file_download
-
-from maas_lib.utils.constant import CONFIGFILE
-
-
-def is_model_name(model):
-    if osp.exists(model):
-        if osp.exists(osp.join(model, CONFIGFILE)):
-            return True
-        else:
-            return False
-    else:
-        # try:
-        #     cfg_file = model_file_download(model, CONFIGFILE)
-        # except Exception:
-        #     cfg_file = None
-        # TODO @wenmeng.zwm use exception instead of
-        # following tricky logic
-        cfg_file = model_file_download(model, CONFIGFILE)
-        with open(cfg_file, 'r') as infile:
-            cfg = json.load(infile)
-        if 'Code' in cfg:
-            return False
-        else:
-            return True
diff --git a/maas_lib/__init__.py b/modelscope/__init__.py
similarity index 100%
rename from maas_lib/__init__.py
rename to modelscope/__init__.py
diff --git a/maas_lib/fileio/__init__.py b/modelscope/fileio/__init__.py
similarity index 100%
rename from maas_lib/fileio/__init__.py
rename to modelscope/fileio/__init__.py
diff --git a/maas_lib/fileio/file.py b/modelscope/fileio/file.py
similarity index 100%
rename from maas_lib/fileio/file.py
rename to modelscope/fileio/file.py
diff --git a/maas_lib/fileio/format/__init__.py b/modelscope/fileio/format/__init__.py
similarity index 100%
rename from maas_lib/fileio/format/__init__.py
rename to modelscope/fileio/format/__init__.py
diff --git a/maas_lib/fileio/format/base.py b/modelscope/fileio/format/base.py
similarity index 100%
rename from maas_lib/fileio/format/base.py
rename to modelscope/fileio/format/base.py
diff --git a/maas_lib/fileio/format/json.py b/modelscope/fileio/format/json.py
similarity index 100%
rename from maas_lib/fileio/format/json.py
rename to modelscope/fileio/format/json.py
diff --git a/maas_lib/fileio/format/yaml.py b/modelscope/fileio/format/yaml.py
similarity index 100%
rename from maas_lib/fileio/format/yaml.py
rename to modelscope/fileio/format/yaml.py
diff --git a/maas_lib/fileio/io.py b/modelscope/fileio/io.py
similarity index 100%
rename from maas_lib/fileio/io.py
rename to modelscope/fileio/io.py
diff --git a/maas_lib/models/__init__.py b/modelscope/models/__init__.py
similarity index 71%
rename from maas_lib/models/__init__.py
rename to modelscope/models/__init__.py
index aa1b3f14..170e525e 100644
--- a/maas_lib/models/__init__.py
+++ b/modelscope/models/__init__.py
@@ -2,4 +2,4 @@
 
 from .base import Model
 from .builder import MODELS, build_model
-from .nlp import SequenceClassificationModel
+from .nlp import BertForSequenceClassification
diff --git a/maas_lib/models/base.py b/modelscope/models/base.py
similarity index 78%
rename from maas_lib/models/base.py
rename to modelscope/models/base.py
index cc6c4ec8..e641236d 100644
--- a/maas_lib/models/base.py
+++ b/modelscope/models/base.py
@@ -7,9 +7,10 @@ from typing import Dict, List, Tuple, Union
 from maas_hub.file_download import model_file_download
 from maas_hub.snapshot_download import snapshot_download
 
-from maas_lib.models.builder import build_model
-from maas_lib.utils.config import Config
-from maas_lib.utils.constant import CONFIGFILE
+from modelscope.models.builder import build_model
+from modelscope.utils.config import Config
+from modelscope.utils.constant import CONFIGFILE
+from modelscope.utils.hub import get_model_cache_dir
 
 Tensor = Union['torch.Tensor', 'tf.Tensor']
 
@@ -39,8 +40,9 @@ class Model(ABC):
         if osp.exists(model_name_or_path):
             local_model_dir = model_name_or_path
         else:
-
-            local_model_dir = snapshot_download(model_name_or_path)
+            cache_path = get_model_cache_dir(model_name_or_path)
+            local_model_dir = cache_path if osp.exists(
+                cache_path) else snapshot_download(model_name_or_path)
             # else:
             #     raise ValueError(
             #         'Remote model repo {model_name_or_path} does not exists')
@@ -48,7 +50,7 @@ class Model(ABC):
         cfg = Config.from_file(osp.join(local_model_dir, CONFIGFILE))
         task_name = cfg.task
         model_cfg = cfg.model
-        # TODO @wenmeng.zwm may should mannually initialize model after model building
+        # TODO @wenmeng.zwm may should manually initialize model after model building
         if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
             model_cfg.type = model_cfg.model_type
         model_cfg.model_dir = local_model_dir
diff --git a/maas_lib/models/builder.py b/modelscope/models/builder.py
similarity index 84%
rename from maas_lib/models/builder.py
rename to modelscope/models/builder.py
index 1e52d271..b6df8c90 100644
--- a/maas_lib/models/builder.py
+++ b/modelscope/models/builder.py
@@ -1,7 +1,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-from maas_lib.utils.config import ConfigDict
-from maas_lib.utils.registry import Registry, build_from_cfg
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.registry import Registry, build_from_cfg
 
 MODELS = Registry('models')
 
diff --git a/maas_lib/models/nlp/space/__init__.py b/modelscope/models/cv/__init__.py
similarity index 100%
rename from maas_lib/models/nlp/space/__init__.py
rename to modelscope/models/cv/__init__.py
diff --git a/maas_lib/models/nlp/space/modules/__init__.py b/modelscope/models/cv/cartoon/__init__.py
similarity index 100%
rename from maas_lib/models/nlp/space/modules/__init__.py
rename to modelscope/models/cv/cartoon/__init__.py
diff --git a/modelscope/models/cv/cartoon/facelib/LICENSE b/modelscope/models/cv/cartoon/facelib/LICENSE
new file mode 100644
index 00000000..8e497ab8
--- /dev/null
+++ b/modelscope/models/cv/cartoon/facelib/LICENSE
@@ -0,0 +1,4 @@
+
+Copyright (c) Peppa_Pig_Face_Engine
+
+https://github.com/610265158/Peppa_Pig_Face_Engine
diff --git a/maas_lib/pipelines/audio/__init__.py b/modelscope/models/cv/cartoon/facelib/LK/__init__.py
similarity index 100%
rename from maas_lib/pipelines/audio/__init__.py
rename to modelscope/models/cv/cartoon/facelib/LK/__init__.py
diff --git a/modelscope/models/cv/cartoon/facelib/LK/lk.py b/modelscope/models/cv/cartoon/facelib/LK/lk.py
new file mode 100644
index 00000000..de7c6ced
--- /dev/null
+++ b/modelscope/models/cv/cartoon/facelib/LK/lk.py
@@ -0,0 +1,97 @@
+import numpy as np
+
+from ..config import config as cfg
+
+
+class GroupTrack():
+
+    def __init__(self):
+        self.old_frame = None
+        self.previous_landmarks_set = None
+        self.with_landmark = True
+        self.thres = cfg.TRACE.pixel_thres
+        self.alpha = cfg.TRACE.smooth_landmark
+        self.iou_thres = cfg.TRACE.iou_thres
+
+    def calculate(self, img, current_landmarks_set):
+        if self.previous_landmarks_set is None:
+            self.previous_landmarks_set = current_landmarks_set
+            result = current_landmarks_set
+        else:
+            previous_lm_num = self.previous_landmarks_set.shape[0]
+            if previous_lm_num == 0:
+                self.previous_landmarks_set = current_landmarks_set
+                result = current_landmarks_set
+                return result
+            else:
+                result = []
+                for i in range(current_landmarks_set.shape[0]):
+                    not_in_flag = True
+                    for j in range(previous_lm_num):
+                        if self.iou(current_landmarks_set[i],
+                                    self.previous_landmarks_set[j]
+                                    ) > self.iou_thres:
+                            result.append(
+                                self.smooth(current_landmarks_set[i],
+                                            self.previous_landmarks_set[j]))
+                            not_in_flag = False
+                            break
+                    if not_in_flag:
+                        result.append(current_landmarks_set[i])
+
+        result = np.array(result)
+        self.previous_landmarks_set = result
+
+        return result
+
+    def iou(self, p_set0, p_set1):
+        rec1 = [
+            np.min(p_set0[:, 0]),
+            np.min(p_set0[:, 1]),
+            np.max(p_set0[:, 0]),
+            np.max(p_set0[:, 1])
+        ]
+        rec2 = [
+            np.min(p_set1[:, 0]),
+            np.min(p_set1[:, 1]),
+            np.max(p_set1[:, 0]),
+            np.max(p_set1[:, 1])
+        ]
+
+        # computing area of each rectangles
+        S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
+        S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
+
+        # computing the sum_area
+        sum_area = S_rec1 + S_rec2
+
+        # find the each edge of intersect rectangle
+        x1 = max(rec1[0], rec2[0])
+        y1 = max(rec1[1], rec2[1])
+        x2 = min(rec1[2], rec2[2])
+        y2 = min(rec1[3], rec2[3])
+
+        # judge if there is an intersect
+        intersect = max(0, x2 - x1) * max(0, y2 - y1)
+
+        iou = intersect / (sum_area - intersect)
+        return iou
+
+    def smooth(self, now_landmarks, previous_landmarks):
+        result = []
+        for i in range(now_landmarks.shape[0]):
+            x = now_landmarks[i][0] - previous_landmarks[i][0]
+            y = now_landmarks[i][1] - previous_landmarks[i][1]
+            dis = np.sqrt(np.square(x) + np.square(y))
+            if dis < self.thres:
+                result.append(previous_landmarks[i])
+            else:
+                result.append(
+                    self.do_moving_average(now_landmarks[i],
+                                           previous_landmarks[i]))
+
+        return np.array(result)
+
+    def do_moving_average(self, p_now, p_previous):
+        p = self.alpha * p_now + (1 - self.alpha) * p_previous
+        return p
diff --git a/maas_lib/pipelines/multi_modal/__init__.py b/modelscope/models/cv/cartoon/facelib/__init__.py
similarity index 100%
rename from maas_lib/pipelines/multi_modal/__init__.py
rename to modelscope/models/cv/cartoon/facelib/__init__.py
diff --git a/modelscope/models/cv/cartoon/facelib/config.py b/modelscope/models/cv/cartoon/facelib/config.py
new file mode 100644
index 00000000..d795fdde
--- /dev/null
+++ b/modelscope/models/cv/cartoon/facelib/config.py
@@ -0,0 +1,23 @@
+import os
+
+import numpy as np
+from easydict import EasyDict as edict
+
+config = edict()
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+config.DETECT = edict()
+config.DETECT.topk = 10
+config.DETECT.thres = 0.8
+config.DETECT.input_shape = (512, 512, 3)
+config.KEYPOINTS = edict()
+config.KEYPOINTS.p_num = 68
+config.KEYPOINTS.base_extend_range = [0.2, 0.3]
+config.KEYPOINTS.input_shape = (160, 160, 3)
+config.TRACE = edict()
+config.TRACE.pixel_thres = 1
+config.TRACE.smooth_box = 0.3
+config.TRACE.smooth_landmark = 0.95
+config.TRACE.iou_thres = 0.5
+config.DATA = edict()
+config.DATA.pixel_means = np.array([123., 116., 103.])  # RGB
diff --git a/modelscope/models/cv/cartoon/facelib/face_detector.py b/modelscope/models/cv/cartoon/facelib/face_detector.py
new file mode 100644
index 00000000..e5589719
--- /dev/null
+++ b/modelscope/models/cv/cartoon/facelib/face_detector.py
@@ -0,0 +1,116 @@
+import time
+
+import cv2
+import numpy as np
+import tensorflow as tf
+
+from .config import config as cfg
+
+if tf.__version__ >= '2.0':
+    tf = tf.compat.v1
+
+
+class FaceDetector:
+
+    def __init__(self, dir):
+
+        self.model_path = dir + '/detector.pb'
+        self.thres = cfg.DETECT.thres
+        self.input_shape = cfg.DETECT.input_shape
+
+        self._graph = tf.Graph()
+
+        with self._graph.as_default():
+            self._graph, self._sess = self.init_model(self.model_path)
+
+            self.input_image = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/images:0')
+            self.training = tf.get_default_graph().get_tensor_by_name(
+                'training_flag:0')
+            self.output_ops = [
+                tf.get_default_graph().get_tensor_by_name('tower_0/boxes:0'),
+                tf.get_default_graph().get_tensor_by_name('tower_0/scores:0'),
+                tf.get_default_graph().get_tensor_by_name(
+                    'tower_0/num_detections:0'),
+            ]
+
+    def __call__(self, image):
+
+        image, scale_x, scale_y = self.preprocess(
+            image,
+            target_width=self.input_shape[1],
+            target_height=self.input_shape[0])
+
+        image = np.expand_dims(image, 0)
+
+        boxes, scores, num_boxes = self._sess.run(
+            self.output_ops,
+            feed_dict={
+                self.input_image: image,
+                self.training: False
+            })
+
+        num_boxes = num_boxes[0]
+        boxes = boxes[0][:num_boxes]
+
+        scores = scores[0][:num_boxes]
+
+        to_keep = scores > self.thres
+        boxes = boxes[to_keep]
+        scores = scores[to_keep]
+
+        y1 = self.input_shape[0] / scale_y
+        x1 = self.input_shape[1] / scale_x
+        y2 = self.input_shape[0] / scale_y
+        x2 = self.input_shape[1] / scale_x
+        scaler = np.array([y1, x1, y2, x2], dtype='float32')
+        boxes = boxes * scaler
+
+        scores = np.expand_dims(scores, 0).reshape([-1, 1])
+
+        for i in range(boxes.shape[0]):
+            boxes[i] = np.array(
+                [boxes[i][1], boxes[i][0], boxes[i][3], boxes[i][2]])
+        return np.concatenate([boxes, scores], axis=1)
+
+    def preprocess(self, image, target_height, target_width, label=None):
+
+        h, w, c = image.shape
+
+        bimage = np.zeros(
+            shape=[target_height, target_width, c],
+            dtype=image.dtype) + np.array(
+                cfg.DATA.pixel_means, dtype=image.dtype)
+        long_side = max(h, w)
+
+        scale_x = scale_y = target_height / long_side
+
+        image = cv2.resize(image, None, fx=scale_x, fy=scale_y)
+
+        h_, w_, _ = image.shape
+        bimage[:h_, :w_, :] = image
+
+        return bimage, scale_x, scale_y
+
+    def init_model(self, *args):
+        pb_path = args[0]
+
+        def init_pb(model_path):
+            config = tf.ConfigProto()
+            config.gpu_options.per_process_gpu_memory_fraction = 0.2
+            compute_graph = tf.Graph()
+            compute_graph.as_default()
+            sess = tf.Session(config=config)
+            with tf.gfile.GFile(model_path, 'rb') as fid:
+                graph_def = tf.GraphDef()
+                graph_def.ParseFromString(fid.read())
+                tf.import_graph_def(graph_def, name='')
+
+            return (compute_graph, sess)
+
+        model = init_pb(pb_path)
+
+        graph = model[0]
+        sess = model[1]
+
+        return graph, sess
diff --git a/modelscope/models/cv/cartoon/facelib/face_landmark.py b/modelscope/models/cv/cartoon/facelib/face_landmark.py
new file mode 100644
index 00000000..063d40c3
--- /dev/null
+++ b/modelscope/models/cv/cartoon/facelib/face_landmark.py
@@ -0,0 +1,154 @@
+import cv2
+import numpy as np
+import tensorflow as tf
+
+from .config import config as cfg
+
+if tf.__version__ >= '2.0':
+    tf = tf.compat.v1
+
+
+class FaceLandmark:
+
+    def __init__(self, dir):
+        self.model_path = dir + '/keypoints.pb'
+        self.min_face = 60
+        self.keypoint_num = cfg.KEYPOINTS.p_num * 2
+
+        self._graph = tf.Graph()
+
+        with self._graph.as_default():
+
+            self._graph, self._sess = self.init_model(self.model_path)
+            self.img_input = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/images:0')
+            self.embeddings = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/prediction:0')
+            self.training = tf.get_default_graph().get_tensor_by_name(
+                'training_flag:0')
+
+            self.landmark = self.embeddings[:, :self.keypoint_num]
+            self.headpose = self.embeddings[:, -7:-4] * 90.
+            self.state = tf.nn.sigmoid(self.embeddings[:, -4:])
+
+    def __call__(self, img, bboxes):
+        landmark_result = []
+        state_result = []
+        for i, bbox in enumerate(bboxes):
+            landmark, state = self._one_shot_run(img, bbox, i)
+            if landmark is not None:
+                landmark_result.append(landmark)
+                state_result.append(state)
+        return np.array(landmark_result), np.array(state_result)
+
+    def simple_run(self, cropped_img):
+        with self._graph.as_default():
+
+            cropped_img = np.expand_dims(cropped_img, axis=0)
+            landmark, p, states = self._sess.run(
+                [self.landmark, self.headpose, self.state],
+                feed_dict={
+                    self.img_input: cropped_img,
+                    self.training: False
+                })
+
+        return landmark, states
+
+    def _one_shot_run(self, image, bbox, i):
+
+        bbox_width = bbox[2] - bbox[0]
+        bbox_height = bbox[3] - bbox[1]
+        if (bbox_width <= self.min_face and bbox_height <= self.min_face):
+            return None, None
+        add = int(max(bbox_width, bbox_height))
+        bimg = cv2.copyMakeBorder(
+            image,
+            add,
+            add,
+            add,
+            add,
+            borderType=cv2.BORDER_CONSTANT,
+            value=cfg.DATA.pixel_means)
+        bbox += add
+
+        one_edge = (1 + 2 * cfg.KEYPOINTS.base_extend_range[0]) * bbox_width
+        center = [(bbox[0] + bbox[2]) // 2, (bbox[1] + bbox[3]) // 2]
+
+        bbox[0] = center[0] - one_edge // 2
+        bbox[1] = center[1] - one_edge // 2
+        bbox[2] = center[0] + one_edge // 2
+        bbox[3] = center[1] + one_edge // 2
+
+        bbox = bbox.astype(np.int)
+        crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
+        h, w, _ = crop_image.shape
+        crop_image = cv2.resize(
+            crop_image,
+            (cfg.KEYPOINTS.input_shape[1], cfg.KEYPOINTS.input_shape[0]))
+        crop_image = crop_image.astype(np.float32)
+
+        keypoints, state = self.simple_run(crop_image)
+
+        res = keypoints[0][:self.keypoint_num].reshape((-1, 2))
+        res[:, 0] = res[:, 0] * w / cfg.KEYPOINTS.input_shape[1]
+        res[:, 1] = res[:, 1] * h / cfg.KEYPOINTS.input_shape[0]
+
+        landmark = []
+        for _index in range(res.shape[0]):
+            x_y = res[_index]
+            landmark.append([
+                int(x_y[0] * cfg.KEYPOINTS.input_shape[0] + bbox[0] - add),
+                int(x_y[1] * cfg.KEYPOINTS.input_shape[1] + bbox[1] - add)
+            ])
+
+        landmark = np.array(landmark, np.float32)
+
+        return landmark, state
+
+    def init_model(self, *args):
+
+        if len(args) == 1:
+            use_pb = True
+            pb_path = args[0]
+        else:
+            use_pb = False
+            meta_path = args[0]
+            restore_model_path = args[1]
+
+        def ini_ckpt():
+            graph = tf.Graph()
+            graph.as_default()
+            configProto = tf.ConfigProto()
+            configProto.gpu_options.allow_growth = True
+            sess = tf.Session(config=configProto)
+            # load_model(model_path, sess)
+            saver = tf.train.import_meta_graph(meta_path)
+            saver.restore(sess, restore_model_path)
+
+            print('Model restred!')
+            return (graph, sess)
+
+        def init_pb(model_path):
+            config = tf.ConfigProto()
+            config.gpu_options.per_process_gpu_memory_fraction = 0.2
+            compute_graph = tf.Graph()
+            compute_graph.as_default()
+            sess = tf.Session(config=config)
+            with tf.gfile.GFile(model_path, 'rb') as fid:
+                graph_def = tf.GraphDef()
+                graph_def.ParseFromString(fid.read())
+                tf.import_graph_def(graph_def, name='')
+
+            # saver = tf.train.Saver(tf.global_variables())
+            # saver.save(sess, save_path='./tmp.ckpt')
+            return (compute_graph, sess)
+
+        if use_pb:
+            model = init_pb(pb_path)
+        else:
+            model = ini_ckpt()
+
+        graph = model[0]
+        sess = model[1]
+
+        return graph, sess
diff --git a/modelscope/models/cv/cartoon/facelib/facer.py b/modelscope/models/cv/cartoon/facelib/facer.py
new file mode 100644
index 00000000..62388ab9
--- /dev/null
+++ b/modelscope/models/cv/cartoon/facelib/facer.py
@@ -0,0 +1,150 @@
+import time
+
+import cv2
+import numpy as np
+
+from .config import config as cfg
+from .face_detector import FaceDetector
+from .face_landmark import FaceLandmark
+from .LK.lk import GroupTrack
+
+
+class FaceAna():
+    '''
+    by default the top3 facea sorted by area will be calculated for time reason
+    '''
+
+    def __init__(self, model_dir):
+        self.face_detector = FaceDetector(model_dir)
+        self.face_landmark = FaceLandmark(model_dir)
+        self.trace = GroupTrack()
+
+        self.track_box = None
+        self.previous_image = None
+        self.previous_box = None
+
+        self.diff_thres = 5
+        self.top_k = cfg.DETECT.topk
+        self.iou_thres = cfg.TRACE.iou_thres
+        self.alpha = cfg.TRACE.smooth_box
+
+    def run(self, image):
+
+        boxes = self.face_detector(image)
+
+        if boxes.shape[0] > self.top_k:
+            boxes = self.sort(boxes)
+
+        boxes_return = np.array(boxes)
+        landmarks, states = self.face_landmark(image, boxes)
+
+        if 1:
+            track = []
+            for i in range(landmarks.shape[0]):
+                track.append([
+                    np.min(landmarks[i][:, 0]),
+                    np.min(landmarks[i][:, 1]),
+                    np.max(landmarks[i][:, 0]),
+                    np.max(landmarks[i][:, 1])
+                ])
+            tmp_box = np.array(track)
+
+            self.track_box = self.judge_boxs(boxes_return, tmp_box)
+
+        self.track_box, landmarks = self.sort_res(self.track_box, landmarks)
+        return self.track_box, landmarks, states
+
+    def sort_res(self, bboxes, points):
+        area = []
+        for bbox in bboxes:
+            bbox_width = bbox[2] - bbox[0]
+            bbox_height = bbox[3] - bbox[1]
+            area.append(bbox_height * bbox_width)
+
+        area = np.array(area)
+        picked = area.argsort()[::-1]
+        sorted_bboxes = [bboxes[x] for x in picked]
+        sorted_points = [points[x] for x in picked]
+        return np.array(sorted_bboxes), np.array(sorted_points)
+
+    def diff_frames(self, previous_frame, image):
+        if previous_frame is None:
+            return True
+        else:
+            _diff = cv2.absdiff(previous_frame, image)
+            diff = np.sum(
+                _diff) / previous_frame.shape[0] / previous_frame.shape[1] / 3.
+            return diff > self.diff_thres
+
+    def sort(self, bboxes):
+        if self.top_k > 100:
+            return bboxes
+        area = []
+        for bbox in bboxes:
+
+            bbox_width = bbox[2] - bbox[0]
+            bbox_height = bbox[3] - bbox[1]
+            area.append(bbox_height * bbox_width)
+
+        area = np.array(area)
+
+        picked = area.argsort()[-self.top_k:][::-1]
+        sorted_bboxes = [bboxes[x] for x in picked]
+        return np.array(sorted_bboxes)
+
+    def judge_boxs(self, previuous_bboxs, now_bboxs):
+
+        def iou(rec1, rec2):
+
+            # computing area of each rectangles
+            S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
+            S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
+
+            # computing the sum_area
+            sum_area = S_rec1 + S_rec2
+
+            # find the each edge of intersect rectangle
+            x1 = max(rec1[0], rec2[0])
+            y1 = max(rec1[1], rec2[1])
+            x2 = min(rec1[2], rec2[2])
+            y2 = min(rec1[3], rec2[3])
+
+            # judge if there is an intersect
+            intersect = max(0, x2 - x1) * max(0, y2 - y1)
+
+            return intersect / (sum_area - intersect)
+
+        if previuous_bboxs is None:
+            return now_bboxs
+
+        result = []
+
+        for i in range(now_bboxs.shape[0]):
+            contain = False
+            for j in range(previuous_bboxs.shape[0]):
+                if iou(now_bboxs[i], previuous_bboxs[j]) > self.iou_thres:
+                    result.append(
+                        self.smooth(now_bboxs[i], previuous_bboxs[j]))
+                    contain = True
+                    break
+            if not contain:
+                result.append(now_bboxs[i])
+
+        return np.array(result)
+
+    def smooth(self, now_box, previous_box):
+
+        return self.do_moving_average(now_box[:4], previous_box[:4])
+
+    def do_moving_average(self, p_now, p_previous):
+        p = self.alpha * p_now + (1 - self.alpha) * p_previous
+        return p
+
+    def reset(self):
+        '''
+        reset the previous info used foe tracking,
+        :return:
+        '''
+        self.track_box = None
+        self.previous_image = None
+        self.previous_box = None
diff --git a/modelscope/models/cv/cartoon/mtcnn_pytorch/LICENSE b/modelscope/models/cv/cartoon/mtcnn_pytorch/LICENSE
new file mode 100644
index 00000000..9210f5b8
--- /dev/null
+++ b/modelscope/models/cv/cartoon/mtcnn_pytorch/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Dan Antoshchenko
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/modelscope/models/cv/cartoon/mtcnn_pytorch/README.md b/modelscope/models/cv/cartoon/mtcnn_pytorch/README.md
new file mode 100644
index 00000000..b748cf58
--- /dev/null
+++ b/modelscope/models/cv/cartoon/mtcnn_pytorch/README.md
@@ -0,0 +1,26 @@
+# MTCNN
+
+`pytorch` implementation of **inference stage** of face detection algorithm described in
+[Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878).
+
+## Example
+![example of a face detection](images/example.png)
+
+## How to use it
+Just download the repository and then do this
+```python
+from src import detect_faces
+from PIL import Image
+
+image = Image.open('image.jpg')
+bounding_boxes, landmarks = detect_faces(image)
+```
+For examples see `test_on_images.ipynb`.
+
+## Requirements
+* pytorch 0.2
+* Pillow, numpy
+
+## Credit
+This implementation is heavily inspired by:
+* [pangyupo/mxnet_mtcnn_face_detection](https://github.com/pangyupo/mxnet_mtcnn_face_detection)
diff --git a/maas_lib/pipelines/nlp/space/__init__.py b/modelscope/models/cv/cartoon/mtcnn_pytorch/__init__.py
similarity index 100%
rename from maas_lib/pipelines/nlp/space/__init__.py
rename to modelscope/models/cv/cartoon/mtcnn_pytorch/__init__.py
diff --git a/maas_lib/preprocessors/space/__init__.py b/modelscope/models/cv/cartoon/mtcnn_pytorch/src/__init__.py
similarity index 100%
rename from maas_lib/preprocessors/space/__init__.py
rename to modelscope/models/cv/cartoon/mtcnn_pytorch/src/__init__.py
diff --git a/modelscope/models/cv/cartoon/mtcnn_pytorch/src/align_trans.py b/modelscope/models/cv/cartoon/mtcnn_pytorch/src/align_trans.py
new file mode 100644
index 00000000..baa3ba73
--- /dev/null
+++ b/modelscope/models/cv/cartoon/mtcnn_pytorch/src/align_trans.py
@@ -0,0 +1,187 @@
+"""
+Created on Mon Apr 24 15:43:29 2017
+@author: zhaoy
+"""
+import cv2
+import numpy as np
+
+from .matlab_cp2tform import get_similarity_transform_for_cv2
+
+# reference facial points, a list of coordinates (x,y)
+dx = 1
+dy = 1
+REFERENCE_FACIAL_POINTS = [
+    [30.29459953 + dx, 51.69630051 + dy],  # left eye
+    [65.53179932 + dx, 51.50139999 + dy],  # right eye
+    [48.02519989 + dx, 71.73660278 + dy],  # nose
+    [33.54930115 + dx, 92.3655014 + dy],  # left mouth
+    [62.72990036 + dx, 92.20410156 + dy]  # right mouth
+]
+
+DEFAULT_CROP_SIZE = (96, 112)
+
+global FACIAL_POINTS
+
+
+class FaceWarpException(Exception):
+
+    def __str__(self):
+        return 'In File {}:{}'.format(__file__, super.__str__(self))
+
+
+def get_reference_facial_points(output_size=None,
+                                inner_padding_factor=0.0,
+                                outer_padding=(0, 0),
+                                default_square=False):
+
+    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
+    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
+
+    # 0) make the inner region a square
+    if default_square:
+        size_diff = max(tmp_crop_size) - tmp_crop_size
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += size_diff
+
+    h_crop = tmp_crop_size[0]
+    w_crop = tmp_crop_size[1]
+    if (output_size):
+        if (output_size[0] == h_crop and output_size[1] == w_crop):
+            return tmp_5pts
+
+    if (inner_padding_factor == 0 and outer_padding == (0, 0)):
+        if output_size is None:
+            return tmp_5pts
+        else:
+            raise FaceWarpException(
+                'No paddings to do, output_size must be None or {}'.format(
+                    tmp_crop_size))
+
+    # check output size
+    if not (0 <= inner_padding_factor <= 1.0):
+        raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
+
+    factor = inner_padding_factor > 0 or outer_padding[0] > 0
+    factor = factor or outer_padding[1] > 0
+    if (factor and output_size is None):
+        output_size = tmp_crop_size * \
+            (1 + inner_padding_factor * 2).astype(np.int32)
+        output_size += np.array(outer_padding)
+
+    cond1 = outer_padding[0] < output_size[0]
+    cond2 = outer_padding[1] < output_size[1]
+    if not (cond1 and cond2):
+        raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
+                                'and outer_padding[1] < output_size[1])')
+
+    # 1) pad the inner region according inner_padding_factor
+    if inner_padding_factor > 0:
+        size_diff = tmp_crop_size * inner_padding_factor * 2
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += np.round(size_diff).astype(np.int32)
+
+    # 2) resize the padded inner region
+    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
+
+    if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[
+            1] * tmp_crop_size[0]:
+        raise FaceWarpException(
+            'Must have (output_size - outer_padding)'
+            '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
+
+    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
+    tmp_5pts = tmp_5pts * scale_factor
+
+    # 3) add outer_padding to make output_size
+    reference_5point = tmp_5pts + np.array(outer_padding)
+
+    return reference_5point
+
+
+def get_affine_transform_matrix(src_pts, dst_pts):
+
+    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
+    n_pts = src_pts.shape[0]
+    ones = np.ones((n_pts, 1), src_pts.dtype)
+    src_pts_ = np.hstack([src_pts, ones])
+    dst_pts_ = np.hstack([dst_pts, ones])
+
+    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
+
+    if rank == 3:
+        tfm = np.float32([[A[0, 0], A[1, 0], A[2, 0]],
+                          [A[0, 1], A[1, 1], A[2, 1]]])
+    elif rank == 2:
+        tfm = np.float32([[A[0, 0], A[1, 0], 0], [A[0, 1], A[1, 1], 0]])
+
+    return tfm
+
+
+def warp_and_crop_face(src_img,
+                       facial_pts,
+                       ratio=0.84,
+                       reference_pts=None,
+                       crop_size=(96, 112),
+                       align_type='similarity'
+                       '',
+                       return_trans_inv=False):
+
+    if reference_pts is None:
+        if crop_size[0] == 96 and crop_size[1] == 112:
+            reference_pts = REFERENCE_FACIAL_POINTS
+        else:
+            default_square = False
+            inner_padding_factor = 0
+            outer_padding = (0, 0)
+            output_size = crop_size
+
+            reference_pts = get_reference_facial_points(
+                output_size, inner_padding_factor, outer_padding,
+                default_square)
+
+    ref_pts = np.float32(reference_pts)
+
+    factor = ratio
+    ref_pts = (ref_pts - 112 / 2) * factor + 112 / 2
+    ref_pts *= crop_size[0] / 112.
+
+    ref_pts_shp = ref_pts.shape
+    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
+        raise FaceWarpException(
+            'reference_pts.shape must be (K,2) or (2,K) and K>2')
+
+    if ref_pts_shp[0] == 2:
+        ref_pts = ref_pts.T
+
+    src_pts = np.float32(facial_pts)
+    src_pts_shp = src_pts.shape
+    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
+        raise FaceWarpException(
+            'facial_pts.shape must be (K,2) or (2,K) and K>2')
+
+    if src_pts_shp[0] == 2:
+        src_pts = src_pts.T
+
+    if src_pts.shape != ref_pts.shape:
+        raise FaceWarpException(
+            'facial_pts and reference_pts must have the same shape')
+
+    if align_type == 'cv2_affine':
+        tfm = cv2.getAffineTransform(src_pts, ref_pts)
+        tfm_inv = cv2.getAffineTransform(ref_pts, src_pts)
+
+    elif align_type == 'affine':
+        tfm = get_affine_transform_matrix(src_pts, ref_pts)
+        tfm_inv = get_affine_transform_matrix(ref_pts, src_pts)
+    else:
+        tfm, tfm_inv = get_similarity_transform_for_cv2(src_pts, ref_pts)
+
+    face_img = cv2.warpAffine(
+        src_img,
+        tfm, (crop_size[0], crop_size[1]),
+        borderValue=(255, 255, 255))
+
+    if return_trans_inv:
+        return face_img, tfm_inv
+    else:
+        return face_img
diff --git a/modelscope/models/cv/cartoon/mtcnn_pytorch/src/matlab_cp2tform.py b/modelscope/models/cv/cartoon/mtcnn_pytorch/src/matlab_cp2tform.py
new file mode 100644
index 00000000..96a5f965
--- /dev/null
+++ b/modelscope/models/cv/cartoon/mtcnn_pytorch/src/matlab_cp2tform.py
@@ -0,0 +1,339 @@
+"""
+Created on Tue Jul 11 06:54:28 2017
+
+@author: zhaoyafei
+"""
+
+import numpy as np
+from numpy.linalg import inv, lstsq
+from numpy.linalg import matrix_rank as rank
+from numpy.linalg import norm
+
+
+class MatlabCp2tormException(Exception):
+
+    def __str__(self):
+        return 'In File {}:{}'.format(__file__, super.__str__(self))
+
+
+def tformfwd(trans, uv):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
+    xy = np.dot(uv, trans)
+    xy = xy[:, 0:-1]
+    return xy
+
+
+def tforminv(trans, uv):
+    """
+    Function:
+    ----------
+        apply the inverse of affine transform 'trans' to uv
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of inverse-transformed coordinates (x, y)
+    """
+    Tinv = inv(trans)
+    xy = tformfwd(Tinv, uv)
+    return xy
+
+
+def findNonreflectiveSimilarity(uv, xy, options=None):
+
+    options = {'K': 2}
+
+    K = options['K']
+    M = xy.shape[0]
+    x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    # print('--->x, y:\n', x, y
+
+    tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
+    tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
+    X = np.vstack((tmp1, tmp2))
+    # print('--->X.shape: ', X.shape
+    # print('X:\n', X
+
+    u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    U = np.vstack((u, v))
+    # print('--->U.shape: ', U.shape
+    # print('U:\n', U
+
+    # We know that X * r = U
+    if rank(X) >= 2 * K:
+        r, _, _, _ = lstsq(X, U)
+        r = np.squeeze(r)
+    else:
+        raise Exception('cp2tform:twoUniquePointsReq')
+
+    # print('--->r:\n', r
+
+    sc = r[0]
+    ss = r[1]
+    tx = r[2]
+    ty = r[3]
+
+    Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
+
+    # print('--->Tinv:\n', Tinv
+
+    T = inv(Tinv)
+    # print('--->T:\n', T
+
+    T[:, 2] = np.array([0, 0, 1])
+
+    return T, Tinv
+
+
+def findSimilarity(uv, xy, options=None):
+
+    options = {'K': 2}
+
+    #    uv = np.array(uv)
+    #    xy = np.array(xy)
+
+    # Solve for trans1
+    trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
+
+    # Solve for trans2
+
+    # manually reflect the xy data across the Y-axis
+    xyR = xy
+    xyR[:, 0] = -1 * xyR[:, 0]
+
+    trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
+
+    # manually reflect the tform to undo the reflection done on xyR
+    TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
+
+    trans2 = np.dot(trans2r, TreflectY)
+
+    # Figure out if trans1 or trans2 is better
+    xy1 = tformfwd(trans1, uv)
+    norm1 = norm(xy1 - xy)
+
+    xy2 = tformfwd(trans2, uv)
+    norm2 = norm(xy2 - xy)
+
+    if norm1 <= norm2:
+        return trans1, trans1_inv
+    else:
+        trans2_inv = inv(trans2)
+        return trans2, trans2_inv
+
+
+def get_similarity_transform(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'trans':
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y, 1] = [u, v, 1] * trans
+
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        @reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+
+    Returns:
+    ----------
+       @trans: 3x3 np.array
+            transform matrix from uv to xy
+        trans_inv: 3x3 np.array
+            inverse of trans, transform matrix from xy to uv
+    """
+
+    if reflective:
+        trans, trans_inv = findSimilarity(src_pts, dst_pts)
+    else:
+        trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
+
+    return trans, trans_inv
+
+
+def cvt_tform_mat_for_cv2(trans):
+    """
+    Function:
+    ----------
+        Convert Transform Matrix 'trans' into 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix from uv to xy
+
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    cv2_trans = trans[:, 0:2].T
+
+    return cv2_trans
+
+
+def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
+    cv2_trans = cvt_tform_mat_for_cv2(trans)
+    cv2_trans_inv = cvt_tform_mat_for_cv2(trans_inv)
+
+    return cv2_trans, cv2_trans_inv
+
+
+if __name__ == '__main__':
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+
+    # In Matlab, run:
+    #
+    #   uv = [u'; v'];
+    #   xy = [x'; y'];
+    #   tform_sim=cp2tform(uv,xy,'similarity');
+    #
+    #   trans = tform_sim.tdata.T
+    #   ans =
+    #       -0.0764   -1.6190         0
+    #        1.6190   -0.0764         0
+    #       -3.2156    0.0290    1.0000
+    #   trans_inv = tform_sim.tdata.Tinv
+    #    ans =
+    #
+    #       -0.0291    0.6163         0
+    #       -0.6163   -0.0291         0
+    #       -0.0756    1.9826    1.0000
+    #    xy_m=tformfwd(tform_sim, u,v)
+    #
+    #    xy_m =
+    #
+    #       -3.2156    0.0290
+    #        1.1833   -9.9143
+    #        5.0323    2.8853
+    #    uv_m=tforminv(tform_sim, x,y)
+    #
+    #    uv_m =
+    #
+    #        0.5698    1.3953
+    #        6.0872    2.2733
+    #       -2.6570    4.3314
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+
+    uv = np.array((u, v)).T
+    xy = np.array((x, y)).T
+
+    print('\n--->uv:')
+    print(uv)
+    print('\n--->xy:')
+    print(xy)
+
+    trans, trans_inv = get_similarity_transform(uv, xy)
+
+    print('\n--->trans matrix:')
+    print(trans)
+
+    print('\n--->trans_inv matrix:')
+    print(trans_inv)
+
+    print('\n---> apply transform to uv')
+    print('\nxy_m = uv_augmented * trans')
+    uv_aug = np.hstack((uv, np.ones((uv.shape[0], 1))))
+    xy_m = np.dot(uv_aug, trans)
+    print(xy_m)
+
+    print('\nxy_m = tformfwd(trans, uv)')
+    xy_m = tformfwd(trans, uv)
+    print(xy_m)
+
+    print('\n---> apply inverse transform to xy')
+    print('\nuv_m = xy_augmented * trans_inv')
+    xy_aug = np.hstack((xy, np.ones((xy.shape[0], 1))))
+    uv_m = np.dot(xy_aug, trans_inv)
+    print(uv_m)
+
+    print('\nuv_m = tformfwd(trans_inv, xy)')
+    uv_m = tformfwd(trans_inv, xy)
+    print(uv_m)
+
+    uv_m = tforminv(trans, xy)
+    print('\nuv_m = tforminv(trans, xy)')
+    print(uv_m)
diff --git a/modelscope/models/cv/cartoon/utils.py b/modelscope/models/cv/cartoon/utils.py
new file mode 100644
index 00000000..39712653
--- /dev/null
+++ b/modelscope/models/cv/cartoon/utils.py
@@ -0,0 +1,91 @@
+import os
+
+import cv2
+import numpy as np
+
+
+def resize_size(image, size=720):
+    h, w, c = np.shape(image)
+    if min(h, w) > size:
+        if h > w:
+            h, w = int(size * h / w), size
+        else:
+            h, w = size, int(size * w / h)
+    image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
+    return image
+
+
+def padTo16x(image):
+    h, w, c = np.shape(image)
+    if h % 16 == 0 and w % 16 == 0:
+        return image, h, w
+    nh, nw = (h // 16 + 1) * 16, (w // 16 + 1) * 16
+    img_new = np.ones((nh, nw, 3), np.uint8) * 255
+    img_new[:h, :w, :] = image
+
+    return img_new, h, w
+
+
+def get_f5p(landmarks, np_img):
+    eye_left = find_pupil(landmarks[36:41], np_img)
+    eye_right = find_pupil(landmarks[42:47], np_img)
+    if eye_left is None or eye_right is None:
+        print('cannot find 5 points with find_puil, used mean instead.!')
+        eye_left = landmarks[36:41].mean(axis=0)
+        eye_right = landmarks[42:47].mean(axis=0)
+    nose = landmarks[30]
+    mouth_left = landmarks[48]
+    mouth_right = landmarks[54]
+    f5p = [[eye_left[0], eye_left[1]], [eye_right[0], eye_right[1]],
+           [nose[0], nose[1]], [mouth_left[0], mouth_left[1]],
+           [mouth_right[0], mouth_right[1]]]
+    return f5p
+
+
+def find_pupil(landmarks, np_img):
+    h, w, _ = np_img.shape
+    xmax = int(landmarks[:, 0].max())
+    xmin = int(landmarks[:, 0].min())
+    ymax = int(landmarks[:, 1].max())
+    ymin = int(landmarks[:, 1].min())
+
+    if ymin >= ymax or xmin >= xmax or ymin < 0 or xmin < 0 or ymax > h or xmax > w:
+        return None
+    eye_img_bgr = np_img[ymin:ymax, xmin:xmax, :]
+    eye_img = cv2.cvtColor(eye_img_bgr, cv2.COLOR_BGR2GRAY)
+    eye_img = cv2.equalizeHist(eye_img)
+    n_marks = landmarks - np.array([xmin, ymin]).reshape([1, 2])
+    eye_mask = cv2.fillConvexPoly(
+        np.zeros_like(eye_img), n_marks.astype(np.int32), 1)
+    ret, thresh = cv2.threshold(eye_img, 100, 255,
+                                cv2.THRESH_BINARY | cv2.THRESH_OTSU)
+    thresh = (1 - thresh / 255.) * eye_mask
+    cnt = 0
+    xm = []
+    ym = []
+    for i in range(thresh.shape[0]):
+        for j in range(thresh.shape[1]):
+            if thresh[i, j] > 0.5:
+                xm.append(j)
+                ym.append(i)
+                cnt += 1
+    if cnt != 0:
+        xm.sort()
+        ym.sort()
+        xm = xm[cnt // 2]
+        ym = ym[cnt // 2]
+    else:
+        xm = thresh.shape[1] / 2
+        ym = thresh.shape[0] / 2
+
+    return xm + xmin, ym + ymin
+
+
+def all_file(file_dir):
+    L = []
+    for root, dirs, files in os.walk(file_dir):
+        for file in files:
+            extend = os.path.splitext(file)[1]
+            if extend == '.png' or extend == '.jpg' or extend == '.jpeg':
+                L.append(os.path.join(root, file))
+    return L
diff --git a/maas_lib/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py
similarity index 52%
rename from maas_lib/models/nlp/__init__.py
rename to modelscope/models/nlp/__init__.py
index 99b56c17..c3baab15 100644
--- a/maas_lib/models/nlp/__init__.py
+++ b/modelscope/models/nlp/__init__.py
@@ -1,3 +1,4 @@
 from .sequence_classification_model import *  # noqa F403
 from .space.dialog_generation_model import *  # noqa F403
-from .space.dialog_intent_model import *
+from .space.dialog_intent_model import *  # noqa F403
+from .text_generation_model import *  # noqa F403
diff --git a/maas_lib/models/nlp/sequence_classification_model.py b/modelscope/models/nlp/sequence_classification_model.py
similarity index 90%
rename from maas_lib/models/nlp/sequence_classification_model.py
rename to modelscope/models/nlp/sequence_classification_model.py
index d29587a0..6ced7a4e 100644
--- a/maas_lib/models/nlp/sequence_classification_model.py
+++ b/modelscope/models/nlp/sequence_classification_model.py
@@ -1,17 +1,17 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict
 
 import numpy as np
 
-from maas_lib.utils.constant import Tasks
+from modelscope.utils.constant import Tasks
 from ..base import Model
 from ..builder import MODELS
 
-__all__ = ['SequenceClassificationModel']
+__all__ = ['BertForSequenceClassification']
 
 
 @MODELS.register_module(
     Tasks.text_classification, module_name=r'bert-sentiment-analysis')
-class SequenceClassificationModel(Model):
+class BertForSequenceClassification(Model):
 
     def __init__(self, model_dir: str, *args, **kwargs):
         # Model.__init__(self, model_dir, model_cls, first_sequence, *args, **kwargs)
diff --git a/maas_lib/trainers/nlp/space/__init__.py b/modelscope/models/nlp/space/__init__.py
similarity index 100%
rename from maas_lib/trainers/nlp/space/__init__.py
rename to modelscope/models/nlp/space/__init__.py
diff --git a/maas_lib/models/nlp/space/dialog_generation_model.py b/modelscope/models/nlp/space/dialog_generation_model.py
similarity index 89%
rename from maas_lib/models/nlp/space/dialog_generation_model.py
rename to modelscope/models/nlp/space/dialog_generation_model.py
index be3d7261..db8c40e0 100644
--- a/maas_lib/models/nlp/space/dialog_generation_model.py
+++ b/modelscope/models/nlp/space/dialog_generation_model.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, Optional
 
-from maas_lib.trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer
-from maas_lib.utils.constant import Tasks
+from modelscope.trainers.nlp.space.trainers.gen_trainer import MultiWOZTrainer
+from modelscope.utils.constant import Tasks
 from ...base import Model, Tensor
 from ...builder import MODELS
 from .model.generator import Generator
@@ -68,13 +68,13 @@ class DialogGenerationModel(Model):
         from numpy import array, float32
         import torch
 
-        turn_1 = {
-            'user': [
-                13, 1045, 2052, 2066, 1037, 10095, 2013, 3002, 2198, 1005,
-                1055, 2267, 2000, 10733, 12570, 21713, 4487, 15474, 1012, 7
-            ]
-        }
-        old_pv_turn_1 = {}
+        # turn_1 = {
+        #     'user': [
+        #         13, 1045, 2052, 2066, 1037, 10095, 2013, 3002, 2198, 1005,
+        #         1055, 2267, 2000, 10733, 12570, 21713, 4487, 15474, 1012, 7
+        #     ]
+        # }
+        # old_pv_turn_1 = {}
 
         turn_2 = {
             'user':
diff --git a/maas_lib/models/nlp/space/dialog_intent_model.py b/modelscope/models/nlp/space/dialog_intent_model.py
similarity index 94%
rename from maas_lib/models/nlp/space/dialog_intent_model.py
rename to modelscope/models/nlp/space/dialog_intent_model.py
index 747f6a20..eb8b3918 100644
--- a/maas_lib/models/nlp/space/dialog_intent_model.py
+++ b/modelscope/models/nlp/space/dialog_intent_model.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, Optional
 
-from maas_lib.trainers.nlp.space.trainers.intent_trainer import IntentTrainer
-from maas_lib.utils.constant import Tasks
+from modelscope.trainers.nlp.space.trainers.intent_trainer import IntentTrainer
+from modelscope.utils.constant import Tasks
 from ...base import Model, Tensor
 from ...builder import MODELS
 from .model.generator import Generator
diff --git a/maas_lib/models/nlp/space/model/__init__.py b/modelscope/models/nlp/space/model/__init__.py
similarity index 100%
rename from maas_lib/models/nlp/space/model/__init__.py
rename to modelscope/models/nlp/space/model/__init__.py
diff --git a/maas_lib/models/nlp/space/model/gen_unified_transformer.py b/modelscope/models/nlp/space/model/gen_unified_transformer.py
similarity index 99%
rename from maas_lib/models/nlp/space/model/gen_unified_transformer.py
rename to modelscope/models/nlp/space/model/gen_unified_transformer.py
index 2ea68bd1..611d627f 100644
--- a/maas_lib/models/nlp/space/model/gen_unified_transformer.py
+++ b/modelscope/models/nlp/space/model/gen_unified_transformer.py
@@ -3,7 +3,7 @@ IntentUnifiedTransformer
 """
 import torch
 
-from maas_lib.models.nlp.space.model.unified_transformer import \
+from modelscope.models.nlp.space.model.unified_transformer import \
     UnifiedTransformer
 
 
diff --git a/maas_lib/models/nlp/space/model/generator.py b/modelscope/models/nlp/space/model/generator.py
similarity index 100%
rename from maas_lib/models/nlp/space/model/generator.py
rename to modelscope/models/nlp/space/model/generator.py
diff --git a/maas_lib/models/nlp/space/model/intent_unified_transformer.py b/modelscope/models/nlp/space/model/intent_unified_transformer.py
similarity index 99%
rename from maas_lib/models/nlp/space/model/intent_unified_transformer.py
rename to modelscope/models/nlp/space/model/intent_unified_transformer.py
index dd63df39..e1302c6f 100644
--- a/maas_lib/models/nlp/space/model/intent_unified_transformer.py
+++ b/modelscope/models/nlp/space/model/intent_unified_transformer.py
@@ -5,7 +5,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from maas_lib.utils.nlp.space.criterions import compute_kl_loss
+from modelscope.utils.nlp.space.criterions import compute_kl_loss
 from .unified_transformer import UnifiedTransformer
 
 
diff --git a/maas_lib/models/nlp/space/model/model_base.py b/modelscope/models/nlp/space/model/model_base.py
similarity index 100%
rename from maas_lib/models/nlp/space/model/model_base.py
rename to modelscope/models/nlp/space/model/model_base.py
diff --git a/maas_lib/models/nlp/space/model/unified_transformer.py b/modelscope/models/nlp/space/model/unified_transformer.py
similarity index 98%
rename from maas_lib/models/nlp/space/model/unified_transformer.py
rename to modelscope/models/nlp/space/model/unified_transformer.py
index 53e03c69..53a18979 100644
--- a/maas_lib/models/nlp/space/model/unified_transformer.py
+++ b/modelscope/models/nlp/space/model/unified_transformer.py
@@ -7,9 +7,9 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from maas_lib.models.nlp.space.model.model_base import ModelBase
-from maas_lib.models.nlp.space.modules.embedder import Embedder
-from maas_lib.models.nlp.space.modules.transformer_block import \
+from modelscope.models.nlp.space.model.model_base import ModelBase
+from modelscope.models.nlp.space.modules.embedder import Embedder
+from modelscope.models.nlp.space.modules.transformer_block import \
     TransformerBlock
 
 
@@ -171,7 +171,7 @@ class UnifiedTransformer(ModelBase):
         batch_size = mask1.shape[0]
         seq_len1 = mask1.shape[1]
         seq_len2 = mask2.shape[1]
-        seq_len = seq_len1 + seq_len2
+        # seq_len = seq_len1 + seq_len2
 
         mask_lu = mask1
         mask_ru = torch.ones(batch_size, seq_len1, seq_len2)
diff --git a/maas_lib/trainers/nlp/space/metrics/__init__.py b/modelscope/models/nlp/space/modules/__init__.py
similarity index 100%
rename from maas_lib/trainers/nlp/space/metrics/__init__.py
rename to modelscope/models/nlp/space/modules/__init__.py
diff --git a/maas_lib/models/nlp/space/modules/embedder.py b/modelscope/models/nlp/space/modules/embedder.py
similarity index 100%
rename from maas_lib/models/nlp/space/modules/embedder.py
rename to modelscope/models/nlp/space/modules/embedder.py
diff --git a/maas_lib/models/nlp/space/modules/feedforward.py b/modelscope/models/nlp/space/modules/feedforward.py
similarity index 100%
rename from maas_lib/models/nlp/space/modules/feedforward.py
rename to modelscope/models/nlp/space/modules/feedforward.py
diff --git a/maas_lib/models/nlp/space/modules/functions.py b/modelscope/models/nlp/space/modules/functions.py
similarity index 100%
rename from maas_lib/models/nlp/space/modules/functions.py
rename to modelscope/models/nlp/space/modules/functions.py
diff --git a/maas_lib/models/nlp/space/modules/multihead_attention.py b/modelscope/models/nlp/space/modules/multihead_attention.py
similarity index 100%
rename from maas_lib/models/nlp/space/modules/multihead_attention.py
rename to modelscope/models/nlp/space/modules/multihead_attention.py
diff --git a/maas_lib/models/nlp/space/modules/transformer_block.py b/modelscope/models/nlp/space/modules/transformer_block.py
similarity index 92%
rename from maas_lib/models/nlp/space/modules/transformer_block.py
rename to modelscope/models/nlp/space/modules/transformer_block.py
index daa7d723..1a0565d6 100644
--- a/maas_lib/models/nlp/space/modules/transformer_block.py
+++ b/modelscope/models/nlp/space/modules/transformer_block.py
@@ -5,8 +5,8 @@ TransformerBlock class.
 import torch
 import torch.nn as nn
 
-from maas_lib.models.nlp.space.modules.feedforward import FeedForward
-from maas_lib.models.nlp.space.modules.multihead_attention import \
+from modelscope.models.nlp.space.modules.feedforward import FeedForward
+from modelscope.models.nlp.space.modules.multihead_attention import \
     MultiheadAttention
 
 
diff --git a/modelscope/models/nlp/text_generation_model.py b/modelscope/models/nlp/text_generation_model.py
new file mode 100644
index 00000000..ebefc8d1
--- /dev/null
+++ b/modelscope/models/nlp/text_generation_model.py
@@ -0,0 +1,52 @@
+from typing import Any, Dict
+
+from modelscope.utils.constant import Tasks
+from ..base import Model, Tensor
+from ..builder import MODELS
+
+__all__ = ['PalmForTextGenerationModel']
+
+
+@MODELS.register_module(Tasks.text_generation, module_name=r'palm')
+class PalmForTextGenerationModel(Model):
+
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """initialize the text generation model from the `model_dir` path.
+
+        Args:
+            model_dir (str): the model path.
+            model_cls (Optional[Any], optional): model loader, if None, use the
+                default loader to load model weights, by default None.
+        """
+        from sofa import PalmTokenizer
+
+        super().__init__(model_dir, *args, **kwargs)
+        self.model_dir = model_dir
+
+        from sofa.models.palm import PalmForConditionalGeneration, TextGenerator
+        tokenizer = kwargs.pop('tokenizer',
+                               PalmTokenizer.from_pretrained(model_dir))
+        model = PalmForConditionalGeneration.from_pretrained(model_dir)
+        self.generator = TextGenerator(model, tokenizer)
+
+    def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
+        """return the result by the model
+
+        Args:
+            input (Dict[str, Any]): the preprocessed data
+
+        Returns:
+            Dict[str, np.ndarray]: results
+                Example:
+                    {
+                        'predictions': array([1]), # lable 0-negative 1-positive
+                        'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32),
+                        'logits': array([[-0.53860897,  1.5029076 ]], dtype=float32) # true value
+                    }
+        """
+
+        encoder_inputs = [
+            input['input_ids'], input['token_type_ids'],
+            input['attention_mask']
+        ]
+        return self.generator(encoder_inputs)
diff --git a/maas_lib/pipelines/__init__.py b/modelscope/pipelines/__init__.py
similarity index 100%
rename from maas_lib/pipelines/__init__.py
rename to modelscope/pipelines/__init__.py
diff --git a/maas_lib/trainers/nlp/space/trainers/__init__.py b/modelscope/pipelines/audio/__init__.py
similarity index 100%
rename from maas_lib/trainers/nlp/space/trainers/__init__.py
rename to modelscope/pipelines/audio/__init__.py
diff --git a/maas_lib/pipelines/base.py b/modelscope/pipelines/base.py
similarity index 56%
rename from maas_lib/pipelines/base.py
rename to modelscope/pipelines/base.py
index c27bc58f..41a80896 100644
--- a/maas_lib/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -2,67 +2,86 @@
 
 import os.path as osp
 from abc import ABC, abstractmethod
-from multiprocessing.sharedctypes import Value
-from typing import Any, Dict, Generator, List, Tuple, Union
+from typing import Any, Dict, Generator, List, Union
 
-from ali_maas_datasets import PyDataset
 from maas_hub.snapshot_download import snapshot_download
 
-from maas_lib.models import Model
-from maas_lib.preprocessors import Preprocessor
-from maas_lib.utils.config import Config
-from maas_lib.utils.constant import CONFIGFILE
+from modelscope.models.base import Model
+from modelscope.preprocessors import Preprocessor
+from modelscope.pydatasets import PyDataset
+from modelscope.utils.config import Config
+from modelscope.utils.hub import get_model_cache_dir
+from modelscope.utils.logger import get_logger
 from .util import is_model_name
 
 Tensor = Union['torch.Tensor', 'tf.Tensor']
 Input = Union[str, PyDataset, 'PIL.Image.Image', 'numpy.ndarray']
+InputModel = Union[str, Model]
 
 output_keys = [
 ]  # 对于不同task的pipeline，规定标准化的输出key，用以对接postprocess,同时也用来标准化postprocess后输出的key
 
+logger = get_logger()
+
 
 class Pipeline(ABC):
 
+    def initiate_single_model(self, model):
+        logger.info(f'initiate model from {model}')
+        # TODO @wenmeng.zwm replace model.startswith('damo/') with get_model
+        if isinstance(model, str) and model.startswith('damo/'):
+            if not osp.exists(model):
+                cache_path = get_model_cache_dir(model)
+                model = cache_path if osp.exists(
+                    cache_path) else snapshot_download(model)
+            return Model.from_pretrained(model) if is_model_name(
+                model) else model
+        elif isinstance(model, Model):
+            return model
+        else:
+            if model and not isinstance(model, str):
+                raise ValueError(
+                    f'model type for single model is either str or Model, but got type {type(model)}'
+                )
+            return model
+
+    def initiate_multiple_models(self, input_models: List[InputModel]):
+        models = []
+        for model in input_models:
+            models.append(self.initiate_single_model(model))
+        return models
+
     def __init__(self,
                  config_file: str = None,
-                 model: Union[Model, str] = None,
-                 preprocessor: Preprocessor = None,
+                 model: Union[InputModel, List[InputModel]] = None,
+                 preprocessor: Union[Preprocessor, List[Preprocessor]] = None,
                  **kwargs):
         """ Base class for pipeline.
 
         If config_file is provided, model and preprocessor will be
-        instantiated from corresponding config. Otherwise model
+        instantiated from corresponding config. Otherwise, model
         and preprocessor will be constructed separately.
 
         Args:
             config_file(str, optional): Filepath to configuration file.
-            model: Model name or model object
-            preprocessor: Preprocessor object
+            model: (list of) Model name or model object
+            preprocessor: (list of) Preprocessor object
         """
         if config_file is not None:
             self.cfg = Config.from_file(config_file)
-
-        if isinstance(model, str):
-            if not osp.exists(model):
-                model = snapshot_download(model)
-
-            if is_model_name(model):
-                self.model = Model.from_pretrained(model)
-            else:
-                self.model = model
-        elif isinstance(model, Model):
-            self.model = model
+        if not isinstance(model, List):
+            self.model = self.initiate_single_model(model)
+            self.models = [self.model]
         else:
-            if model:
-                raise ValueError(
-                    f'model type is either str or Model, but got type {type(model)}'
-                )
+            self.models = self.initiate_multiple_models(model)
+
+        self.has_multiple_models = len(self.models) > 1
         self.preprocessor = preprocessor
 
     def __call__(self, input: Union[Input, List[Input]], *args,
                  **post_kwargs) -> Union[Dict[str, Any], Generator]:
-        # moodel provider should leave it as it is
-        # maas library developer will handle this function
+        # model provider should leave it as it is
+        # modelscope library developer will handle this function
 
         # simple showcase, need to support iterator type for both tensorflow and pytorch
         # input_dict = self._handle_input(input)
@@ -91,15 +110,17 @@ class Pipeline(ABC):
 
     def preprocess(self, inputs: Input) -> Dict[str, Any]:
         """ Provide default implementation based on preprocess_cfg and user can reimplement it
-
         """
         assert self.preprocessor is not None, 'preprocess method should be implemented'
+        assert not isinstance(self.preprocessor, List),\
+            'default implementation does not support using multiple preprocessors.'
         return self.preprocessor(inputs)
 
     def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         """ Provide default implementation using self.model and user can reimplement it
         """
         assert self.model is not None, 'forward method should be implemented'
+        assert not self.has_multiple_models, 'default implementation does not support multiple models in a pipeline.'
         return self.model(inputs)
 
     @abstractmethod
diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py
new file mode 100644
index 00000000..6495a5db
--- /dev/null
+++ b/modelscope/pipelines/builder.py
@@ -0,0 +1,171 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os.path as osp
+from typing import List, Union
+
+import json
+from maas_hub.file_download import model_file_download
+
+from modelscope.models.base import Model
+from modelscope.utils.config import Config, ConfigDict
+from modelscope.utils.constant import CONFIGFILE, Tasks
+from modelscope.utils.registry import Registry, build_from_cfg
+from .base import Pipeline
+from .util import is_model_name
+
+PIPELINES = Registry('pipelines')
+
+DEFAULT_MODEL_FOR_PIPELINE = {
+    # TaskName: (pipeline_module_name, model_repo)
+    Tasks.image_matting: ('image-matting', 'damo/image-matting-person'),
+    Tasks.text_classification:
+    ('bert-sentiment-analysis', 'damo/bert-base-sst2'),
+    Tasks.text_generation: ('palm', 'damo/nlp_palm_text-generation_chinese'),
+    Tasks.image_captioning: ('ofa', None),
+    Tasks.image_generation:
+    ('person-image-cartoon',
+     'damo/cv_unet_person-image-cartoon_compound-models'),
+}
+
+
+def build_pipeline(cfg: ConfigDict,
+                   task_name: str = None,
+                   default_args: dict = None):
+    """ build pipeline given model config dict.
+
+    Args:
+        cfg (:obj:`ConfigDict`): config dict for model object.
+        task_name (str, optional):  task name, refer to
+            :obj:`Tasks` for more details.
+        default_args (dict, optional): Default initialization arguments.
+    """
+    return build_from_cfg(
+        cfg, PIPELINES, group_key=task_name, default_args=default_args)
+
+
+def pipeline(task: str = None,
+             model: Union[str, List[str], Model, List[Model]] = None,
+             preprocessor=None,
+             config_file: str = None,
+             pipeline_name: str = None,
+             framework: str = None,
+             device: int = -1,
+             **kwargs) -> Pipeline:
+    """ Factory method to build a obj:`Pipeline`.
+
+
+    Args:
+        task (str): Task name defining which pipeline will be returned.
+        model (str or List[str] or obj:`Model` or obj:list[`Model`]): (list of) model name or model object.
+        preprocessor: preprocessor object.
+        config_file (str, optional): path to config file.
+        pipeline_name (str, optional): pipeline class name or alias name.
+        framework (str, optional): framework type.
+        device (int, optional): which device is used to do inference.
+
+    Return:
+        pipeline (obj:`Pipeline`): pipeline object for certain task.
+
+    Examples:
+    ```python
+    >>> # Using default model for a task
+    >>> p = pipeline('image-classification')
+    >>> # Using pipeline with a model name
+    >>> p = pipeline('text-classification', model='damo/distilbert-base-uncased')
+    >>> # Using pipeline with a model object
+    >>> resnet = Model.from_pretrained('Resnet')
+    >>> p = pipeline('image-classification', model=resnet)
+    >>> # Using pipeline with a list of model names
+    >>> p = pipeline('audio-kws', model=['damo/audio-tts', 'damo/auto-tts2'])
+    """
+    if task is None and pipeline_name is None:
+        raise ValueError('task or pipeline_name is required')
+
+    if pipeline_name is None:
+        # get default pipeline for this task
+        if isinstance(model, str) \
+           or (isinstance(model, list) and isinstance(model[0], str)):
+
+            # if is_model_name(model):
+            if (isinstance(model, str) and model.startswith('damo/')) \
+               or (isinstance(model, list) and model[0].startswith('damo/')) \
+               or (isinstance(model, str) and osp.exists(model)):
+                # TODO @wenmeng.zwm  add support when model is a str of modelhub address
+                # read pipeline info from modelhub configuration file.
+                pipeline_name, default_model_repo = get_default_pipeline_info(
+                    task)
+            else:
+                pipeline_name = get_pipeline_by_model_name(task, model)
+        else:
+            pipeline_name, default_model_repo = get_default_pipeline_info(task)
+
+        if model is None:
+            model = default_model_repo
+
+    assert isinstance(model, (type(None), str, Model, list)), \
+        f'model should be either None, str, List[str], Model, or List[Model], but got {type(model)}'
+
+    cfg = ConfigDict(type=pipeline_name, model=model)
+
+    if kwargs:
+        cfg.update(kwargs)
+
+    if preprocessor is not None:
+        cfg.preprocessor = preprocessor
+
+    return build_pipeline(cfg, task_name=task)
+
+
+def add_default_pipeline_info(task: str,
+                              model_name: str,
+                              modelhub_name: str = None,
+                              overwrite: bool = False):
+    """ Add default model for a task.
+
+    Args:
+        task (str): task name.
+        model_name (str): model_name.
+        modelhub_name (str): name for default modelhub.
+        overwrite (bool): overwrite default info.
+    """
+    if not overwrite:
+        assert task not in DEFAULT_MODEL_FOR_PIPELINE, \
+            f'task {task} already has default model.'
+
+    DEFAULT_MODEL_FOR_PIPELINE[task] = (model_name, modelhub_name)
+
+
+def get_default_pipeline_info(task):
+    """ Get default info for certain task.
+
+    Args:
+        task (str): task name.
+
+    Return:
+        A tuple: first element is pipeline name(model_name), second element
+            is modelhub name.
+    """
+
+    if task not in DEFAULT_MODEL_FOR_PIPELINE:
+        # support pipeline which does not register default model
+        pipeline_name = list(PIPELINES.modules[task].keys())[0]
+        default_model = None
+    else:
+        pipeline_name, default_model = DEFAULT_MODEL_FOR_PIPELINE[task]
+    return pipeline_name, default_model
+
+
+def get_pipeline_by_model_name(task: str, model: Union[str, List[str]]):
+    """ Get pipeline name by task name and model name
+
+    Args:
+        task (str): task name.
+        model (str| list[str]): model names
+    """
+    if isinstance(model, str):
+        model_key = model
+    else:
+        model_key = '_'.join(model)
+    assert model_key in PIPELINES.modules[task], \
+        f'pipeline for task {task} model {model_key} not found.'
+    return model_key
diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py
new file mode 100644
index 00000000..79c85c19
--- /dev/null
+++ b/modelscope/pipelines/cv/__init__.py
@@ -0,0 +1,2 @@
+from .image_cartoon_pipeline import ImageCartoonPipeline
+from .image_matting_pipeline import ImageMattingPipeline
diff --git a/modelscope/pipelines/cv/image_cartoon_pipeline.py b/modelscope/pipelines/cv/image_cartoon_pipeline.py
new file mode 100644
index 00000000..d253eaf5
--- /dev/null
+++ b/modelscope/pipelines/cv/image_cartoon_pipeline.py
@@ -0,0 +1,148 @@
+import os
+from typing import Any, Dict
+
+import cv2
+import numpy as np
+import PIL
+import tensorflow as tf
+
+from modelscope.models.cv.cartoon.facelib.facer import FaceAna
+from modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans import (
+    get_reference_facial_points, warp_and_crop_face)
+from modelscope.models.cv.cartoon.utils import get_f5p, padTo16x, resize_size
+from modelscope.pipelines.base import Input
+from modelscope.preprocessors import load_image
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+from ..base import Pipeline
+from ..builder import PIPELINES
+
+if tf.__version__ >= '2.0':
+    tf = tf.compat.v1
+    tf.disable_eager_execution()
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+    Tasks.image_generation, module_name='person-image-cartoon')
+class ImageCartoonPipeline(Pipeline):
+
+    def __init__(self, model: str):
+        super().__init__(model=model)
+        self.facer = FaceAna(self.model)
+        self.sess_anime_head = self.load_sess(
+            os.path.join(self.model, 'cartoon_anime_h.pb'), 'model_anime_head')
+        self.sess_anime_bg = self.load_sess(
+            os.path.join(self.model, 'cartoon_anime_bg.pb'), 'model_anime_bg')
+
+        self.box_width = 288
+        global_mask = cv2.imread(os.path.join(self.model, 'alpha.jpg'))
+        global_mask = cv2.resize(
+            global_mask, (self.box_width, self.box_width),
+            interpolation=cv2.INTER_AREA)
+        self.global_mask = cv2.cvtColor(
+            global_mask, cv2.COLOR_BGR2GRAY).astype(np.float32) / 255.0
+
+    def load_sess(self, model_path, name):
+        config = tf.ConfigProto(allow_soft_placement=True)
+        config.gpu_options.allow_growth = True
+        sess = tf.Session(config=config)
+        logger.info(f'loading model from {model_path}')
+        with tf.gfile.FastGFile(model_path, 'rb') as f:
+            graph_def = tf.GraphDef()
+            graph_def.ParseFromString(f.read())
+            sess.graph.as_default()
+            tf.import_graph_def(graph_def, name=name)
+            sess.run(tf.global_variables_initializer())
+        logger.info(f'load model {model_path} done.')
+        return sess
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+        if isinstance(input, str):
+            img = np.array(load_image(input))
+        elif isinstance(input, PIL.Image.Image):
+            img = np.array(input.convert('RGB'))
+        elif isinstance(input, np.ndarray):
+            if len(input.shape) == 2:
+                input = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR)
+            img = input[:, :, ::-1]
+        else:
+            raise TypeError(f'input should be either str, PIL.Image,'
+                            f' np.array, but got {type(input)}')
+        img = img.astype(np.float)
+        result = {'img': img}
+        return result
+
+    def detect_face(self, img):
+        src_h, src_w, _ = img.shape
+        boxes, landmarks, _ = self.facer.run(img)
+        if boxes.shape[0] == 0:
+            return None
+        else:
+            return landmarks
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+
+        img = input['img'].astype(np.uint8)
+        ori_h, ori_w, _ = img.shape
+        img = resize_size(img, size=720)
+
+        img_brg = img[:, :, ::-1]
+
+        landmarks = self.detect_face(img)
+        if landmarks is None:
+            print('No face detected!')
+            return {'output_png': None}
+
+        # background process
+        pad_bg, pad_h, pad_w = padTo16x(img_brg)
+
+        bg_res = self.sess_anime_bg.run(
+            self.sess_anime_bg.graph.get_tensor_by_name(
+                'model_anime_bg/output_image:0'),
+            feed_dict={'model_anime_bg/input_image:0': pad_bg})
+        res = bg_res[:pad_h, :pad_w, :]
+
+        for landmark in landmarks:
+            # get facial 5 points
+            f5p = get_f5p(landmark, img_brg)
+
+            # face alignment
+            head_img, trans_inv = warp_and_crop_face(
+                img,
+                f5p,
+                ratio=0.75,
+                reference_pts=get_reference_facial_points(default_square=True),
+                crop_size=(self.box_width, self.box_width),
+                return_trans_inv=True)
+
+            # head process
+            head_res = self.sess_anime_head.run(
+                self.sess_anime_head.graph.get_tensor_by_name(
+                    'model_anime_head/output_image:0'),
+                feed_dict={
+                    'model_anime_head/input_image:0': head_img[:, :, ::-1]
+                })
+
+            # merge head and background
+            head_trans_inv = cv2.warpAffine(
+                head_res,
+                trans_inv, (np.size(img, 1), np.size(img, 0)),
+                borderValue=(0, 0, 0))
+
+            mask = self.global_mask
+            mask_trans_inv = cv2.warpAffine(
+                mask,
+                trans_inv, (np.size(img, 1), np.size(img, 0)),
+                borderValue=(0, 0, 0))
+            mask_trans_inv = np.expand_dims(mask_trans_inv, 2)
+
+            res = mask_trans_inv * head_trans_inv + (1 - mask_trans_inv) * res
+
+        res = cv2.resize(res, (ori_w, ori_h), interpolation=cv2.INTER_AREA)
+
+        return {'output_png': res}
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return inputs
diff --git a/maas_lib/pipelines/cv/image_matting.py b/modelscope/pipelines/cv/image_matting_pipeline.py
similarity index 90%
rename from maas_lib/pipelines/cv/image_matting.py
rename to modelscope/pipelines/cv/image_matting_pipeline.py
index fdb443f9..6f3ff5f5 100644
--- a/maas_lib/pipelines/cv/image_matting.py
+++ b/modelscope/pipelines/cv/image_matting_pipeline.py
@@ -4,12 +4,11 @@ from typing import Any, Dict, List, Tuple, Union
 import cv2
 import numpy as np
 import PIL
-from cv2 import COLOR_GRAY2RGB
 
-from maas_lib.pipelines.base import Input
-from maas_lib.preprocessors import load_image
-from maas_lib.utils.constant import Tasks
-from maas_lib.utils.logger import get_logger
+from modelscope.pipelines.base import Input
+from modelscope.preprocessors import load_image
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
 from ..base import Pipeline
 from ..builder import PIPELINES
 
@@ -18,7 +17,7 @@ logger = get_logger()
 
 @PIPELINES.register_module(
     Tasks.image_matting, module_name=Tasks.image_matting)
-class ImageMatting(Pipeline):
+class ImageMattingPipeline(Pipeline):
 
     def __init__(self, model: str):
         super().__init__(model=model)
diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py
new file mode 100644
index 00000000..7d9a2c59
--- /dev/null
+++ b/modelscope/pipelines/multi_modal/__init__.py
@@ -0,0 +1 @@
+from .image_captioning import ImageCaptionPipeline
diff --git a/modelscope/pipelines/multi_modal/image_captioning.py b/modelscope/pipelines/multi_modal/image_captioning.py
new file mode 100644
index 00000000..91180e23
--- /dev/null
+++ b/modelscope/pipelines/multi_modal/image_captioning.py
@@ -0,0 +1,118 @@
+from typing import Any, Dict
+
+import numpy as np
+import torch
+from PIL import Image
+
+from modelscope.pipelines.base import Input
+from modelscope.preprocessors import load_image
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+from ..base import Pipeline
+from ..builder import PIPELINES
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.image_captioning, module_name='ofa')
+class ImageCaptionPipeline(Pipeline):
+    # TODO: refine using modelhub
+    def __init__(self, model: str, bpe_dir: str):
+        super().__init__()
+        # turn on cuda if GPU is available
+        from fairseq import checkpoint_utils, tasks, utils
+        from ofa.tasks.mm_tasks import CaptionTask
+
+        tasks.register_task('caption', CaptionTask)
+        use_cuda = False
+        # use fp16 only when GPU is available
+        use_fp16 = False
+        overrides = {
+            'bpe_dir': bpe_dir,
+            'eval_cider': False,
+            'beam': 5,
+            'max_len_b': 16,
+            'no_repeat_ngram_size': 3,
+            'seed': 7
+        }
+        models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+            utils.split_paths(model), arg_overrides=overrides)
+
+        # Move models to GPU
+        for model in models:
+            model.eval()
+            if use_cuda:
+                model.cuda()
+            if use_fp16:
+                model.half()
+            model.prepare_for_inference_(cfg)
+        self.models = models
+        # Initialize generator
+        self.generator = task.build_generator(models, cfg.generation)
+
+        # Initialize transform
+        from torchvision import transforms
+        mean = [0.5, 0.5, 0.5]
+        std = [0.5, 0.5, 0.5]
+
+        self.patch_resize_transform = transforms.Compose([
+            lambda image: image.convert('RGB'),
+            transforms.Resize(
+                (cfg.task.patch_image_size, cfg.task.patch_image_size),
+                interpolation=Image.BICUBIC),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=mean, std=std),
+        ])
+
+        self.task = task
+        self.bos_item = torch.LongTensor([task.src_dict.bos()])
+        self.eos_item = torch.LongTensor([task.src_dict.eos()])
+        self.pad_idx = task.src_dict.pad()
+
+    def preprocess(self, input: Input) -> Dict[str, Any]:
+
+        def encode_text(text, length=None, append_bos=False, append_eos=False):
+            s = self.task.tgt_dict.encode_line(
+                line=self.task.bpe.encode(text),
+                add_if_not_exist=False,
+                append_eos=False).long()
+            if length is not None:
+                s = s[:length]
+            if append_bos:
+                s = torch.cat([self.bos_item, s])
+            if append_eos:
+                s = torch.cat([s, self.eos_item])
+            return s
+
+        patch_image = self.patch_resize_transform(
+            load_image(input)).unsqueeze(0)
+        patch_mask = torch.tensor([True])
+        text = 'what does the image describe?'
+        src_text = encode_text(
+            text, append_bos=True, append_eos=True).unsqueeze(0)
+        src_length = torch.LongTensor(
+            [s.ne(self.pad_idx).long().sum() for s in src_text])
+        sample = {
+            'id': np.array(['42']),
+            'net_input': {
+                'src_tokens': src_text,
+                'src_lengths': src_length,
+                'patch_images': patch_image,
+                'patch_masks': patch_mask,
+            }
+        }
+        return sample
+
+    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        from ofa.utils.eval_utils import eval_caption
+
+        results, _ = eval_caption(self.task, self.generator, self.models,
+                                  input)
+        return {
+            'image_id': results[0]['image_id'],
+            'caption': results[0]['caption']
+        }
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        # What should we do here ?
+        return inputs
diff --git a/maas_lib/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py
similarity index 77%
rename from maas_lib/pipelines/nlp/__init__.py
rename to modelscope/pipelines/nlp/__init__.py
index 8a97070b..fe11e9a3 100644
--- a/maas_lib/pipelines/nlp/__init__.py
+++ b/modelscope/pipelines/nlp/__init__.py
@@ -1,3 +1,4 @@
 from .sequence_classification_pipeline import *  # noqa F403
 from .space.dialog_generation_pipeline import *  # noqa F403
 from .space.dialog_intent_pipeline import *  # noqa F403
+from .text_generation_pipeline import *  # noqa F403
diff --git a/maas_lib/pipelines/nlp/sequence_classification_pipeline.py b/modelscope/pipelines/nlp/sequence_classification_pipeline.py
similarity index 63%
rename from maas_lib/pipelines/nlp/sequence_classification_pipeline.py
rename to modelscope/pipelines/nlp/sequence_classification_pipeline.py
index f3b20f95..5a14f136 100644
--- a/maas_lib/pipelines/nlp/sequence_classification_pipeline.py
+++ b/modelscope/pipelines/nlp/sequence_classification_pipeline.py
@@ -1,13 +1,14 @@
 import os
 import uuid
-from typing import Any, Dict
+from typing import Any, Dict, Union
 
 import json
 import numpy as np
 
-from maas_lib.models.nlp import SequenceClassificationModel
-from maas_lib.preprocessors import SequenceClassificationPreprocessor
-from maas_lib.utils.constant import Tasks
+from modelscope.models.nlp import BertForSequenceClassification
+from modelscope.preprocessors import SequenceClassificationPreprocessor
+from modelscope.utils.constant import Tasks
+from ...models import Model
 from ..base import Input, Pipeline
 from ..builder import PIPELINES
 
@@ -18,19 +19,31 @@ __all__ = ['SequenceClassificationPipeline']
     Tasks.text_classification, module_name=r'bert-sentiment-analysis')
 class SequenceClassificationPipeline(Pipeline):
 
-    def __init__(self, model: SequenceClassificationModel,
-                 preprocessor: SequenceClassificationPreprocessor, **kwargs):
+    def __init__(self,
+                 model: Union[BertForSequenceClassification, str],
+                 preprocessor: SequenceClassificationPreprocessor = None,
+                 **kwargs):
         """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction
 
         Args:
-            model (SequenceClassificationModel): a model instance
+            model (BertForSequenceClassification): a model instance
             preprocessor (SequenceClassificationPreprocessor): a preprocessor instance
         """
-
-        super().__init__(model=model, preprocessor=preprocessor, **kwargs)
+        assert isinstance(model, str) or isinstance(model, BertForSequenceClassification), \
+            'model must be a single str or BertForSequenceClassification'
+        sc_model = model if isinstance(
+            model,
+            BertForSequenceClassification) else Model.from_pretrained(model)
+        if preprocessor is None:
+            preprocessor = SequenceClassificationPreprocessor(
+                sc_model.model_dir,
+                first_sequence='sentence',
+                second_sequence=None)
+        super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)
 
         from easynlp.utils import io
-        self.label_path = os.path.join(model.model_dir, 'label_mapping.json')
+        self.label_path = os.path.join(sc_model.model_dir,
+                                       'label_mapping.json')
         with io.open(self.label_path) as f:
             self.label_mapping = json.load(f)
         self.label_id_to_name = {
diff --git a/maas_lib/utils/__init__.py b/modelscope/pipelines/nlp/space/__init__.py
similarity index 100%
rename from maas_lib/utils/__init__.py
rename to modelscope/pipelines/nlp/space/__init__.py
diff --git a/maas_lib/pipelines/nlp/space/dialog_generation_pipeline.py b/modelscope/pipelines/nlp/space/dialog_generation_pipeline.py
similarity index 91%
rename from maas_lib/pipelines/nlp/space/dialog_generation_pipeline.py
rename to modelscope/pipelines/nlp/space/dialog_generation_pipeline.py
index a7b2d057..4107c35e 100644
--- a/maas_lib/pipelines/nlp/space/dialog_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/space/dialog_generation_pipeline.py
@@ -1,8 +1,8 @@
 from typing import Any, Dict, Optional
 
-from maas_lib.models.nlp import DialogGenerationModel
-from maas_lib.preprocessors import DialogGenerationPreprocessor
-from maas_lib.utils.constant import Tasks
+from modelscope.models.nlp import DialogGenerationModel
+from modelscope.preprocessors import DialogGenerationPreprocessor
+from modelscope.utils.constant import Tasks
 from ...base import Model, Tensor
 from ...builder import PIPELINES
 
diff --git a/maas_lib/pipelines/nlp/space/dialog_intent_pipeline.py b/modelscope/pipelines/nlp/space/dialog_intent_pipeline.py
similarity index 87%
rename from maas_lib/pipelines/nlp/space/dialog_intent_pipeline.py
rename to modelscope/pipelines/nlp/space/dialog_intent_pipeline.py
index 99862311..26ba5553 100644
--- a/maas_lib/pipelines/nlp/space/dialog_intent_pipeline.py
+++ b/modelscope/pipelines/nlp/space/dialog_intent_pipeline.py
@@ -1,8 +1,8 @@
 from typing import Any, Dict, Optional
 
-from maas_lib.models.nlp import DialogIntentModel
-from maas_lib.preprocessors import DialogIntentPreprocessor
-from maas_lib.utils.constant import Tasks
+from modelscope.models.nlp import DialogIntentModel
+from modelscope.preprocessors import DialogIntentPreprocessor
+from modelscope.utils.constant import Tasks
 from ...base import Input, Pipeline
 from ...builder import PIPELINES
 
diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py
new file mode 100644
index 00000000..7ad2b67f
--- /dev/null
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -0,0 +1,59 @@
+from typing import Dict, Optional, Union
+
+from modelscope.models import Model
+from modelscope.models.nlp import PalmForTextGenerationModel
+from modelscope.preprocessors import TextGenerationPreprocessor
+from modelscope.utils.constant import Tasks
+from ..base import Pipeline, Tensor
+from ..builder import PIPELINES
+
+__all__ = ['TextGenerationPipeline']
+
+
+@PIPELINES.register_module(Tasks.text_generation, module_name=r'palm')
+class TextGenerationPipeline(Pipeline):
+
+    def __init__(self,
+                 model: Union[PalmForTextGenerationModel, str],
+                 preprocessor: Optional[TextGenerationPreprocessor] = None,
+                 **kwargs):
+        """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction
+
+        Args:
+            model (SequenceClassificationModel): a model instance
+            preprocessor (SequenceClassificationPreprocessor): a preprocessor instance
+        """
+        sc_model = model if isinstance(
+            model,
+            PalmForTextGenerationModel) else Model.from_pretrained(model)
+        if preprocessor is None:
+            preprocessor = TextGenerationPreprocessor(
+                sc_model.model_dir,
+                first_sequence='sentence',
+                second_sequence=None)
+        super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)
+        self.tokenizer = preprocessor.tokenizer
+
+    def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
+        """process the prediction results
+
+        Args:
+            inputs (Dict[str, Any]): _description_
+
+        Returns:
+            Dict[str, str]: the prediction results
+        """
+
+        vocab_size = len(self.tokenizer.vocab)
+        pred_list = inputs['predictions']
+        pred_ids = pred_list[0][0].cpu().numpy().tolist()
+        for j in range(len(pred_ids)):
+            if pred_ids[j] >= vocab_size:
+                pred_ids[j] = 100
+        pred = self.tokenizer.convert_ids_to_tokens(pred_ids)
+        pred_string = ''.join(pred).replace(
+            '##',
+            '').split('[SEP]')[0].replace('[CLS]',
+                                          '').replace('[SEP]',
+                                                      '').replace('[UNK]', '')
+        return {'pred_string': pred_string}
diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py
new file mode 100644
index 00000000..caef6b22
--- /dev/null
+++ b/modelscope/pipelines/util.py
@@ -0,0 +1,46 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import os.path as osp
+from typing import List, Union
+
+import json
+from maas_hub.file_download import model_file_download
+
+from modelscope.utils.constant import CONFIGFILE
+
+
+def is_model_name(model: Union[str, List]):
+    """ whether model is a valid modelhub path
+    """
+
+    def is_model_name_impl(model):
+        if osp.exists(model):
+            if osp.exists(osp.join(model, CONFIGFILE)):
+                return True
+            else:
+                return False
+        else:
+            # try:
+            #     cfg_file = model_file_download(model, CONFIGFILE)
+            # except Exception:
+            #     cfg_file = None
+            # TODO @wenmeng.zwm use exception instead of
+            # following tricky logic
+            cfg_file = model_file_download(model, CONFIGFILE)
+            with open(cfg_file, 'r') as infile:
+                cfg = json.load(infile)
+            if 'Code' in cfg:
+                return False
+            else:
+                return True
+
+    if isinstance(model, str):
+        return is_model_name_impl(model)
+    else:
+        results = [is_model_name_impl(m) for m in model]
+        all_true = all(results)
+        any_true = any(results)
+        if any_true and not all_true:
+            raise ValueError('some model are hub address, some are not')
+
+        return all_true
diff --git a/maas_lib/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py
similarity index 89%
rename from maas_lib/preprocessors/__init__.py
rename to modelscope/preprocessors/__init__.py
index 4a146843..5f473753 100644
--- a/maas_lib/preprocessors/__init__.py
+++ b/modelscope/preprocessors/__init__.py
@@ -5,5 +5,6 @@ from .builder import PREPROCESSORS, build_preprocessor
 from .common import Compose
 from .image import LoadImage, load_image
 from .nlp import *  # noqa F403
+from .nlp import TextGenerationPreprocessor
 from .space.dialog_generation_preprocessor import *  # noqa F403
 from .space.dialog_intent_preprocessor import *  # noqa F403
diff --git a/maas_lib/preprocessors/base.py b/modelscope/preprocessors/base.py
similarity index 100%
rename from maas_lib/preprocessors/base.py
rename to modelscope/preprocessors/base.py
diff --git a/maas_lib/preprocessors/builder.py b/modelscope/preprocessors/builder.py
similarity index 80%
rename from maas_lib/preprocessors/builder.py
rename to modelscope/preprocessors/builder.py
index 69421b5f..918f8d17 100644
--- a/maas_lib/preprocessors/builder.py
+++ b/modelscope/preprocessors/builder.py
@@ -1,8 +1,8 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-from maas_lib.utils.config import ConfigDict
-from maas_lib.utils.constant import Fields
-from maas_lib.utils.registry import Registry, build_from_cfg
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.constant import Fields
+from modelscope.utils.registry import Registry, build_from_cfg
 
 PREPROCESSORS = Registry('preprocessors')
 
diff --git a/maas_lib/preprocessors/common.py b/modelscope/preprocessors/common.py
similarity index 100%
rename from maas_lib/preprocessors/common.py
rename to modelscope/preprocessors/common.py
diff --git a/maas_lib/preprocessors/image.py b/modelscope/preprocessors/image.py
similarity index 96%
rename from maas_lib/preprocessors/image.py
rename to modelscope/preprocessors/image.py
index 8db9f5bb..142f9484 100644
--- a/maas_lib/preprocessors/image.py
+++ b/modelscope/preprocessors/image.py
@@ -4,8 +4,8 @@ from typing import Dict, Union
 
 from PIL import Image, ImageOps
 
-from maas_lib.fileio import File
-from maas_lib.utils.constant import Fields
+from modelscope.fileio import File
+from modelscope.utils.constant import Fields
 from .builder import PREPROCESSORS
 
 
diff --git a/maas_lib/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py
similarity index 57%
rename from maas_lib/preprocessors/nlp.py
rename to modelscope/preprocessors/nlp.py
index 0a03328a..c85c2159 100644
--- a/maas_lib/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -5,8 +5,8 @@ from typing import Any, Dict, Union
 
 from transformers import AutoTokenizer
 
-from maas_lib.utils.constant import Fields, InputFields
-from maas_lib.utils.type_assert import type_assert
+from modelscope.utils.constant import Fields, InputFields
+from modelscope.utils.type_assert import type_assert
 from .base import Preprocessor
 from .builder import PREPROCESSORS
 
@@ -92,3 +92,61 @@ class SequenceClassificationPreprocessor(Preprocessor):
         rst['token_type_ids'].append(feature['token_type_ids'])
 
         return rst
+
+
+@PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm')
+class TextGenerationPreprocessor(Preprocessor):
+
+    def __init__(self, model_dir: str, *args, **kwargs):
+        """preprocess the data using the vocab.txt from the `model_dir` path
+
+        Args:
+            model_dir (str): model path
+        """
+        from sofa import PalmTokenizer
+
+        super().__init__(*args, **kwargs)
+
+        self.model_dir: str = model_dir
+        self.first_sequence: str = kwargs.pop('first_sequence',
+                                              'first_sequence')
+        self.second_sequence: str = kwargs.pop('second_sequence',
+                                               'second_sequence')
+        self.sequence_length: int = kwargs.pop('sequence_length', 128)
+        self.tokenizer = PalmTokenizer.from_pretrained(model_dir)
+
+    @type_assert(object, str)
+    def __call__(self, data: str) -> Dict[str, Any]:
+        """process the raw input data
+
+        Args:
+            data (str): a sentence
+                Example:
+                    'you are so handsome.'
+
+        Returns:
+            Dict[str, Any]: the preprocessed data
+        """
+        import torch
+
+        new_data = {self.first_sequence: data}
+        # preprocess the data for the model input
+
+        rst = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []}
+
+        max_seq_length = self.sequence_length
+
+        text_a = new_data.get(self.first_sequence, None)
+        text_b = new_data.get(self.second_sequence, None)
+        feature = self.tokenizer(
+            text_a,
+            text_b,
+            padding='max_length',
+            truncation=True,
+            max_length=max_seq_length)
+
+        rst['input_ids'].append(feature['input_ids'])
+        rst['attention_mask'].append(feature['attention_mask'])
+        rst['token_type_ids'].append(feature['token_type_ids'])
+
+        return {k: torch.tensor(v) for k, v in rst.items()}
diff --git a/maas_lib/utils/nlp/__init__.py b/modelscope/preprocessors/space/__init__.py
similarity index 100%
rename from maas_lib/utils/nlp/__init__.py
rename to modelscope/preprocessors/space/__init__.py
diff --git a/maas_lib/preprocessors/space/dialog_generation_preprocessor.py b/modelscope/preprocessors/space/dialog_generation_preprocessor.py
similarity index 83%
rename from maas_lib/preprocessors/space/dialog_generation_preprocessor.py
rename to modelscope/preprocessors/space/dialog_generation_preprocessor.py
index 5b127e8e..c6e2584d 100644
--- a/maas_lib/preprocessors/space/dialog_generation_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_generation_preprocessor.py
@@ -4,10 +4,11 @@ import os
 import uuid
 from typing import Any, Dict, Union
 
-from maas_lib.data.nlp.space.fields.gen_field import MultiWOZBPETextField
-from maas_lib.utils.config import Config
-from maas_lib.utils.constant import Fields, InputFields
-from maas_lib.utils.type_assert import type_assert
+from modelscope.preprocessors.space.fields.gen_field import \
+    MultiWOZBPETextField
+from modelscope.utils.config import Config
+from modelscope.utils.constant import Fields, InputFields
+from modelscope.utils.type_assert import type_assert
 from ..base import Preprocessor
 from ..builder import PREPROCESSORS
 
diff --git a/maas_lib/preprocessors/space/dialog_intent_preprocessor.py b/modelscope/preprocessors/space/dialog_intent_preprocessor.py
similarity index 84%
rename from maas_lib/preprocessors/space/dialog_intent_preprocessor.py
rename to modelscope/preprocessors/space/dialog_intent_preprocessor.py
index 8dba5075..f26fa9a5 100644
--- a/maas_lib/preprocessors/space/dialog_intent_preprocessor.py
+++ b/modelscope/preprocessors/space/dialog_intent_preprocessor.py
@@ -4,10 +4,11 @@ import os
 import uuid
 from typing import Any, Dict, Union
 
-from maas_lib.data.nlp.space.fields.intent_field import IntentBPETextField
-from maas_lib.utils.config import Config
-from maas_lib.utils.constant import Fields, InputFields
-from maas_lib.utils.type_assert import type_assert
+from modelscope.preprocessors.space.fields.intent_field import \
+    IntentBPETextField
+from modelscope.utils.config import Config
+from modelscope.utils.constant import Fields, InputFields
+from modelscope.utils.type_assert import type_assert
 from ..base import Preprocessor
 from ..builder import PREPROCESSORS
 
diff --git a/maas_lib/utils/nlp/space/__init__.py b/modelscope/preprocessors/space/fields/__init__.py
similarity index 100%
rename from maas_lib/utils/nlp/space/__init__.py
rename to modelscope/preprocessors/space/fields/__init__.py
diff --git a/modelscope/preprocessors/space/fields/gen_field.py b/modelscope/preprocessors/space/fields/gen_field.py
new file mode 100644
index 00000000..91ec1cf8
--- /dev/null
+++ b/modelscope/preprocessors/space/fields/gen_field.py
@@ -0,0 +1,688 @@
+"""
+Field class
+"""
+import os
+import random
+from collections import OrderedDict
+from itertools import chain
+
+import numpy as np
+
+from modelscope.preprocessors.space.tokenizer import Tokenizer
+from modelscope.utils.nlp.space import ontology, utils
+from modelscope.utils.nlp.space.db_ops import MultiWozDB
+from modelscope.utils.nlp.space.utils import list2np
+
+
+class BPETextField(object):
+
+    pad_token = '[PAD]'
+    bos_token = '[BOS]'
+    eos_token = '[EOS]'
+    unk_token = '[UNK]'
+    sos_u_token = '<sos_u>'
+    eos_u_token = '<eos_u>'
+    sos_b_token = '<sos_b>'
+    eos_b_token = '<eos_b>'
+    sos_d_token = '<sos_d>'
+    eos_d_token = '<eos_d>'
+    sos_a_token = '<sos_a>'
+    eos_a_token = '<eos_a>'
+    sos_db_token = '<sos_db>'
+    eos_db_token = '<eos_db>'
+    sos_r_token = '<sos_r>'
+    eos_r_token = '<eos_r>'
+
+    @property
+    def bot_id(self):
+        """
+        用于区分user和bot两个角色
+        1和0不是词表中的index，而是专门针对role的index，大小就为2，对应超参数'num_type_embeddings'
+        """
+        return 0
+
+    @property
+    def user_id(self):
+        """
+        用于区分user和bot两个角色
+        1和0不是词表中的index，而是专门针对role的index，大小就为2，对应超参数'num_type_embeddings'
+        """
+        return 1
+
+    @property
+    def vocab_size(self):
+        return self.tokenizer.vocab_size
+
+    @property
+    def num_specials(self):
+        return len(self.tokenizer.special_tokens)
+
+    @property
+    def pad_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.pad_token])[0]
+
+    @property
+    def bos_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.bos_token])[0]
+
+    @property
+    def eos_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_token])[0]
+
+    @property
+    def unk_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.unk_token])[0]
+
+    @property
+    def sos_u_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_u_token])[0]
+
+    @property
+    def eos_u_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_u_token])[0]
+
+    @property
+    def sos_b_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_b_token])[0]
+
+    @property
+    def eos_b_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_b_token])[0]
+
+    @property
+    def sos_db_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_db_token])[0]
+
+    @property
+    def eos_db_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_db_token])[0]
+
+    @property
+    def sos_a_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_a_token])[0]
+
+    @property
+    def eos_a_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_a_token])[0]
+
+    @property
+    def sos_r_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_r_token])[0]
+
+    @property
+    def eos_r_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_r_token])[0]
+
+    @property
+    def sos_d_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_d_token])[0]
+
+    @property
+    def eos_d_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_d_token])[0]
+
+    def __init__(self, config):
+        self.gpu = 0
+        self.tokenizer = None
+        self.vocab = None
+        self.db = None
+        self.set_stats = {}
+
+        self.prompt_num_for_understand = config.BPETextField.prompt_num_for_understand
+        self.prompt_num_for_policy = config.BPETextField.prompt_num_for_policy
+        self.understand_tokens = ontology.get_understand_tokens(
+            self.prompt_num_for_understand)
+        self.policy_tokens = ontology.get_policy_tokens(
+            self.prompt_num_for_policy)
+
+        self.with_query_bow = config.BPETextField.with_query_bow
+        self.understand = config.BPETextField.understand
+        self.policy = config.BPETextField.policy
+
+        self.batch_size = config.Trainer.batch_size
+        self.filtered = config.BPETextField.filtered
+        self.max_len = config.BPETextField.max_len
+        self.min_utt_len = config.BPETextField.min_utt_len
+        self.max_utt_len = config.BPETextField.max_utt_len
+        self.min_ctx_turn = config.BPETextField.min_ctx_turn
+        self.max_ctx_turn = config.BPETextField.max_ctx_turn - 1  # subtract reply turn
+
+        self.use_true_prev_bspn = config.Generator.use_true_prev_bspn
+        self.use_true_prev_aspn = config.Generator.use_true_prev_aspn
+        self.use_true_db_pointer = config.Generator.use_true_db_pointer
+        self.use_true_prev_resp = config.Generator.use_true_prev_resp
+        self.use_true_curr_bspn = config.Generator.use_true_curr_bspn
+        self.use_true_curr_aspn = config.Generator.use_true_curr_aspn
+        self.use_all_previous_context = config.Generator.use_all_previous_context
+        self.use_true_bspn_for_ctr_eval = config.Generator.use_true_bspn_for_ctr_eval
+        self.use_true_domain_for_ctr_eval = config.Generator.use_true_domain_for_ctr_eval
+
+    def collate_fn_multi_turn(self, samples):
+        batch_size = len(samples)
+        batch = {}
+
+        src = [sp['src'][-self.max_ctx_turn:] for sp in samples]
+        query_token, src_token, src_pos, src_turn, src_role = [], [], [], [], []
+        for utts in src:
+            query_token.append(utts[-1])
+            utt_lens = [len(utt) for utt in utts]
+
+            # Token ids
+            src_token.append(list(chain(*utts))[-self.max_len:])
+
+            # Position ids
+            pos = [list(range(l)) for l in utt_lens]
+            src_pos.append(list(chain(*pos))[-self.max_len:])
+
+            # Turn ids
+            turn = [[len(utts) - i] * l for i, l in enumerate(utt_lens)]
+            src_turn.append(list(chain(*turn))[-self.max_len:])
+
+            # Role ids
+            role = [
+                [self.bot_id if (len(utts) - i) % 2 == 0 else self.user_id] * l
+                for i, l in enumerate(utt_lens)
+            ]
+            src_role.append(list(chain(*role))[-self.max_len:])
+
+        # src端序列和tgt端序列需要分开pad，以保证解码时第一个词对齐
+        src_token = list2np(src_token, padding=self.pad_id)
+        src_pos = list2np(src_pos, padding=self.pad_id)
+        src_turn = list2np(src_turn, padding=self.pad_id)
+        src_role = list2np(src_role, padding=self.pad_id)
+        batch['src_token'] = src_token
+        batch['src_pos'] = src_pos
+        batch['src_type'] = src_role
+        batch['src_turn'] = src_turn
+        batch['src_mask'] = (src_token != self.pad_id).astype('int64')
+
+        if self.with_query_bow:
+            query_token = list2np(query_token, padding=self.pad_id)
+            batch['query_token'] = query_token
+            batch['query_mask'] = (query_token != self.pad_id).astype('int64')
+
+        if self.understand_ids and self.understand:
+            understand = [self.understand_ids for _ in samples]
+            understand_token = np.array(understand).astype('int64')
+            batch['understand_token'] = understand_token
+            batch['understand_mask'] = (understand_token !=
+                                        self.pad_id).astype('int64')
+
+        if self.policy_ids and self.policy:
+            policy = [self.policy_ids for _ in samples]
+            policy_token = np.array(policy).astype('int64')
+            batch['policy_token'] = policy_token
+            batch['policy_mask'] = (policy_token !=
+                                    self.pad_id).astype('int64')
+
+        if 'tgt' in samples[0]:
+            tgt = [sp['tgt'] for sp in samples]
+
+            # Token ids & Label ids
+            tgt_token = list2np(tgt, padding=self.pad_id)
+
+            # Position ids
+            tgt_pos = np.zeros_like(tgt_token)
+            tgt_pos[:] = np.arange(tgt_token.shape[1], dtype=tgt_token.dtype)
+
+            # Turn ids
+            tgt_turn = np.zeros_like(tgt_token)
+
+            # Role ids
+            tgt_role = np.full_like(tgt_token, self.bot_id)
+
+            batch['tgt_token'] = tgt_token
+            batch['tgt_pos'] = tgt_pos
+            batch['tgt_type'] = tgt_role
+            batch['tgt_turn'] = tgt_turn
+            batch['tgt_mask'] = (tgt_token != self.pad_id).astype('int64')
+
+        return batch, batch_size
+
+    def _bucket_by_turn(self, encoded_data):
+        turn_bucket = {}
+        for dial in encoded_data:
+            turn_len = len(dial)
+            if turn_len not in turn_bucket:
+                turn_bucket[turn_len] = []
+            turn_bucket[turn_len].append(dial)
+        return OrderedDict(sorted(turn_bucket.items(), key=lambda i: i[0]))
+
+    def _construct_mini_batch(self, data):
+        all_batches = []
+        batch = []
+        for dial in data:
+            batch.append(dial)
+            if len(batch) == self.batch_size:
+                # print('batch size: %d, batch num +1'%(len(batch)))
+                all_batches.append(batch)
+                batch = []
+        # if remainder > 1/2 batch_size, just put them in the previous batch, otherwise form a new batch
+        # print('last batch size: %d, batch num +1'%(len(batch)))
+        # if (len(batch) % len(cfg.cuda_device)) != 0:
+        #     batch = batch[:-(len(batch) % len(cfg.cuda_device))]
+        # TODO deal with deleted data
+        if self.gpu <= 1:
+            if len(batch) > 0.5 * self.batch_size:
+                all_batches.append(batch)
+            elif len(all_batches):
+                all_batches[-1].extend(batch)
+            else:
+                all_batches.append(batch)
+
+        return all_batches
+
+    def transpose_batch(self, batch):
+        dial_batch = []
+        turn_num = len(batch[0])
+        for turn in range(turn_num):
+            turn_l = {}
+            for dial in batch:
+                this_turn = dial[turn]
+                for k in this_turn:
+                    if k not in turn_l:
+                        turn_l[k] = []
+                    turn_l[k].append(this_turn[k])
+            dial_batch.append(turn_l)
+        return dial_batch
+
+    def get_eval_data(self, set_name='dev'):
+        name_to_set = {'train': self.train, 'test': self.test, 'dev': self.dev}
+        dial = name_to_set[set_name]
+
+        if set_name not in self.set_stats:
+            self.set_stats[set_name] = {}
+        num_turns = 0
+        num_dials = len(dial)
+        for d in dial:
+            num_turns += len(d)
+
+        self.set_stats[set_name]['num_turns'] = num_turns
+        self.set_stats[set_name]['num_dials'] = num_dials
+
+        return dial
+
+    def get_nontranspose_data_iterator(self, all_batches):
+        for i, batch in enumerate(all_batches):
+            yield batch
+
+    def get_data_iterator(self, all_batches):
+        for i, batch in enumerate(all_batches):
+            yield self.transpose_batch(batch)
+
+
+class MultiWOZBPETextField(BPETextField):
+
+    def __init__(self, model_dir, config):
+        super(MultiWOZBPETextField, self).__init__(config)
+        import spacy
+        self.nlp = spacy.load('en_core_web_sm')
+
+        self.db = MultiWozDB(
+            model_dir, {
+                'attraction': 'db/attraction_db_processed.json',
+                'hospital': 'db/hospital_db_processed.json',
+                'hotel': 'db/hotel_db_processed.json',
+                'police': 'db/police_db_processed.json',
+                'restaurant': 'db/restaurant_db_processed.json',
+                'taxi': 'db/taxi_db_processed.json',
+                'train': 'db/train_db_processed.json',
+            })
+        self._build_vocab(model_dir)
+
+        special_tokens = [
+            self.pad_token, self.bos_token, self.eos_token, self.unk_token
+        ]
+        special_tokens.extend(self.add_sepcial_tokens())
+        self.tokenizer = Tokenizer(
+            vocab_path=os.path.join(model_dir, 'vocab.txt'),
+            special_tokens=special_tokens,
+            tokenizer_type=config.BPETextField.tokenizer_type)
+        self.understand_ids = self.tokenizer.convert_tokens_to_ids(
+            self.understand_tokens)
+        self.policy_ids = self.tokenizer.convert_tokens_to_ids(
+            self.policy_tokens)
+
+        return
+
+    def get_ids(self, data: str):
+        result = [self.sos_u_id] + self.tokenizer.convert_tokens_to_ids(
+            self.tokenizer.tokenize(
+                self._get_convert_str(data))) + [self.eos_u_id]
+        return result
+
+    def inverse_transpose_turn(self, turn_list):
+        """
+        eval, one dialog at a time
+        """
+        dialogs = {}
+        turn_num = len(turn_list)
+        dial_id = turn_list[0]['dial_id']
+        dialogs[dial_id] = []
+        for turn_idx in range(turn_num):
+            dial_turn = {}
+            turn = turn_list[turn_idx]
+            for key, value in turn.items():
+                if key == 'dial_id':
+                    continue
+                if key == 'pointer' and self.db is not None:
+                    turn_domain = turn['turn_domain'][-1]
+                    value = self.db.pointerBack(value, turn_domain)
+                dial_turn[key] = value
+            dialogs[dial_id].append(dial_turn)
+        return dialogs
+
+    def inverse_transpose_batch(self, turn_batch_list):
+        """
+        :param turn_batch_list: list of transpose dial batch
+        """
+        dialogs = {}
+        total_turn_num = len(turn_batch_list)
+        # initialize
+        for idx_in_batch, dial_id in enumerate(turn_batch_list[0]['dial_id']):
+            dialogs[dial_id] = []
+            for turn_n in range(total_turn_num):
+                dial_turn = {}
+                turn_batch = turn_batch_list[turn_n]
+                for key, v_list in turn_batch.items():
+                    if key == 'dial_id':
+                        continue
+                    value = v_list[idx_in_batch]
+                    if key == 'pointer' and self.db is not None:
+                        turn_domain = turn_batch['turn_domain'][idx_in_batch][
+                            -1]
+                        value = self.db.pointerBack(value, turn_domain)
+                    dial_turn[key] = value
+                dialogs[dial_id].append(dial_turn)
+        return dialogs
+
+    def get_batches(self, set_name):
+        """
+        compute dataset stats.
+        """
+        global dia_count
+        log_str = ''
+        name_to_set = {'train': self.train, 'test': self.test, 'dev': self.dev}
+        dial = name_to_set[set_name]
+        turn_bucket = self._bucket_by_turn(dial)
+        # self._shuffle_turn_bucket(turn_bucket)
+        all_batches = []
+
+        if set_name not in self.set_stats:
+            self.set_stats[set_name] = {}
+        num_training_steps = 0
+        num_turns = 0
+        num_dials = 0
+
+        for k in turn_bucket:
+            if set_name != 'test' and k == 1 or k >= 17:
+                continue
+            batches = self._construct_mini_batch(turn_bucket[k])
+            try:
+                log_str += 'turn num:%d, dial num: %d, batch num: %d last batch len: %d\n' % (
+                    k, len(turn_bucket[k]), len(batches), len(batches[-1]))
+            except:
+                log_str += 'turn num:%d, dial num: %d, batch num: %d last batch len: %d\n' % (
+                    k, len(turn_bucket[k]), len(batches), 0.0)
+            # print("turn num:%d, dial num:v%d, batch num: %d, "%(k, len(turn_bucket[k]), len(batches)))
+            num_training_steps += k * len(batches)
+            num_turns += k * len(turn_bucket[k])
+            num_dials += len(turn_bucket[k])
+            all_batches += batches
+        log_str += 'total batch num: %d\n' % len(all_batches)
+        # print('total batch num: %d'%len(all_batches))
+        # print('dialog count: %d'%dia_count)
+        # return all_batches
+
+        # log stats
+        # logging.info(log_str)
+        # cfg.num_training_steps = num_training_steps * cfg.epoch_num
+        self.set_stats[set_name][
+            'num_training_steps_per_epoch'] = num_training_steps  # turn-level的steps
+        self.set_stats[set_name]['num_turns'] = num_turns
+        self.set_stats[set_name]['num_dials'] = num_dials
+
+        if set_name == 'train':
+            random.shuffle(all_batches)
+        return all_batches
+
+    def add_sepcial_tokens(self):
+        """
+            add special tokens to gpt tokenizer
+            serves a similar role of Vocab.construt()
+            make a dict of special tokens
+        """
+        special_tokens = []
+        prompt_tokens = self.understand_tokens + self.policy_tokens
+        special_tokens.extend(
+            ontology.get_special_tokens(other_tokens=prompt_tokens))
+
+        for word in ontology.all_domains + ['general']:
+            word = '[' + word + ']'
+            special_tokens.append(word)
+        for word in ontology.all_acts:
+            word = '[' + word + ']'
+            special_tokens.append(word)
+        for word in self.vocab._word2idx.keys():
+            if word.startswith('[value_') and word.endswith(']'):
+                special_tokens.append(word)
+
+        return special_tokens
+
+    def _build_vocab(self, model_dir: str):
+        self.vocab = utils.MultiWOZVocab(3000)
+        vp = os.path.join('{}/vocab'.format(model_dir))
+        self.vocab.load_vocab(vp)
+        return self.vocab.vocab_size
+
+    def _get_convert_str(self, sent):
+        assert isinstance(sent, str)
+        return ' '.join([
+            self.tokenizer.spec_convert_dict.get(tok, tok)
+            for tok in sent.split()
+        ])
+
+    def bspan_to_DBpointer(self, bspan, turn_domain):
+        constraint_dict = self.bspan_to_constraint_dict(bspan)
+        # print(constraint_dict)
+        matnums = self.db.get_match_num(constraint_dict)
+        match_dom = turn_domain[0] if len(turn_domain) == 1 else turn_domain[1]
+        match_dom = match_dom[1:-1] if match_dom.startswith('[') else match_dom
+        match = matnums[match_dom]
+        # vector = self.db.addDBPointer(match_dom, match)
+        vector = self.db.addDBIndicator(match_dom, match)
+        return vector
+
+    def bspan_to_constraint_dict(self, bspan, bspn_mode='bspn'):
+        """
+        ['[hotel]', 'pricerange', 'cheap', 'type', 'hotel'] -> {'hotel': {'pricerange': 'cheap', 'type': 'hotel'}}
+        """
+        bspan = bspan.split() if isinstance(bspan, str) else bspan
+        constraint_dict = {}
+        domain = None
+        conslen = len(bspan)
+        for idx, cons in enumerate(bspan):
+            cons = self.vocab.decode(cons) if type(cons) is not str else cons
+            if cons == '<eos_b>':
+                break
+            if '[' in cons:
+                if cons[1:-1] not in ontology.all_domains:
+                    continue
+                domain = cons[1:-1]
+            elif cons in ontology.get_slot:
+                if domain is None:
+                    continue
+                if cons == 'people':
+                    # handle confusion of value name "people's portraits..." and slot people
+                    try:
+                        ns = bspan[idx + 1]
+                        ns = self.vocab.decode(ns) if type(
+                            ns) is not str else ns
+                        if ns == "'s":
+                            continue
+                    except:
+                        continue
+                if not constraint_dict.get(domain):
+                    constraint_dict[domain] = {}
+                if bspn_mode == 'bsdx':
+                    constraint_dict[domain][cons] = 1
+                    continue
+                vidx = idx + 1
+                if vidx == conslen:
+                    break
+                vt_collect = []
+                vt = bspan[vidx]
+                vt = self.vocab.decode(vt) if type(vt) is not str else vt
+                while vidx < conslen and vt != '<eos_b>' and '[' not in vt and vt not in ontology.get_slot:
+                    vt_collect.append(vt)
+                    vidx += 1
+                    if vidx == conslen:
+                        break
+                    vt = bspan[vidx]
+                    vt = self.vocab.decode(vt) if type(vt) is not str else vt
+                if vt_collect:
+                    constraint_dict[domain][cons] = ' '.join(vt_collect)
+
+        return constraint_dict
+
+    def convert_batch_turn(self, turn_batch, pv_batch, first_turn=False):
+        """
+        URURU：这里的含义是指轮级别的训练（数据整理），区别于session级别的训练方式（convert_batch_session）；
+        但不同于eval时的含义，eval时二者都是逐轮依次生成的，那时URURU的含义请见相关的函数注释；
+
+        convert the current and the last turn
+        concat [U_0,R_0,...,U_{t-1}, R_{t-1}, U_t, B_t, A_t, R_t]
+        firts turn: [U_t, B_t, A_t, R_t]
+        try: [user, bspn, db, aspn, resp]
+
+        """
+        inputs = []
+        if first_turn:
+            batch_zipped = zip(turn_batch['user'], turn_batch['bspn'],
+                               turn_batch['db'], turn_batch['aspn'],
+                               turn_batch['resp'])
+            for u, b, db, a, r in batch_zipped:
+                if self.use_true_curr_bspn:
+                    src = [u + b + db]
+                    tgt = a + r
+                else:
+                    src = [u]
+                    tgt = b + db + a + r
+                inputs.append({'src': src, 'tgt': tgt})
+                pv = [src[-1], tgt]
+                pv_batch.append(pv)
+        else:
+            batch_zipped = zip(pv_batch, turn_batch['user'],
+                               turn_batch['bspn'], turn_batch['db'],
+                               turn_batch['aspn'], turn_batch['resp'])
+            for i, (pv, u, b, db, a, r) in enumerate(batch_zipped):
+                if self.use_true_curr_bspn:
+                    src = pv + [u + b + db]
+                    tgt = a + r
+                else:
+                    src = pv + [u]
+                    tgt = b + db + a + r
+                inputs.append({'src': src, 'tgt': tgt})
+                pv = [src[-1], tgt]
+                pv_batch[i].extend(pv)
+
+        return inputs, pv_batch
+
+    def wrap_result_lm(self, result_dict, eos_syntax=None):
+        results = []
+        eos_syntax = ontology.eos_tokens if not eos_syntax else eos_syntax
+        sos_syntax = ontology.sos_tokens
+        # ground truth bs, as, ds.. generate response
+        field = [
+            'dial_id', 'turn_num', 'user', 'bspn_gen', 'bsdx', 'resp_gen',
+            'resp', 'aspn_gen', 'aspn', 'dspn_gen', 'dspn', 'bspn', 'pointer',
+            'qspn_gen', 'qspn'
+        ]
+
+        for dial_id, turns in result_dict.items():
+            entry = {'dial_id': dial_id, 'trun_num': len(turns)}
+            for f in field[2:]:
+                entry[f] = ''  # TODO ???
+            results.append(entry)
+            for turn_idx, turn in enumerate(turns):
+                entry = {'dial_id': dial_id}
+                for key in field:
+                    if key in ['dial_id']:
+                        continue
+                    v = turn.get(key, '')
+                    if key == 'turn_domain':
+                        v = ' '.join(v)
+
+                    if key in eos_syntax and v != '':
+                        # remove eos tokens
+                        v = self.tokenizer.decode(v)
+                        v = v.split()
+                        # remove eos/sos in span
+                        if eos_syntax[key] in v:
+                            v.remove(eos_syntax[key])
+                        if sos_syntax[key] in v:
+                            v.remove(sos_syntax[key])
+                        v = ' '.join(v)
+                    else:
+                        pass  # v = v
+                    entry[key] = v
+
+                results.append(entry)
+
+        return results, field
+
+    def convert_turn_eval(self, turn, pv_turn, first_turn=False):
+        """
+        input: [all previous ubar, U_t, B_t, A_t] predict R_t
+        firts turn: [U_t, B_t, A_t] predict R_t
+
+        regarding the context, all previous ubar is too slow, try the previous ubar
+        """
+        inputs = {}
+
+        context_list = []
+        prompt_id = None
+        if self.use_true_curr_bspn:
+            if self.use_true_curr_aspn:  # only predict resp
+                context_list = ['user', 'bspn', 'db', 'aspn']
+                prompt_id = self.sos_r_id
+            else:  # predicted aspn
+                context_list = ['user', 'bspn', 'db']
+                prompt_id = self.sos_a_id
+        else:  # predict bspn aspn resp. db are not predicted. this part tbd.
+            context_list = ['user']
+            prompt_id = self.sos_b_id
+
+        if first_turn:
+            context = []
+            for c in context_list:
+                context += turn[c]
+
+            inputs['src'] = [context]
+            inputs['labels'] = [context]
+        else:
+            context = []
+            for c in context_list:
+                context += turn[c]
+
+            if self.use_true_curr_bspn:
+                pv_context = pv_turn['labels'] + [
+                    pv_turn['aspn'] + pv_turn['resp']
+                ]
+            else:
+                pv_context = pv_turn['labels'] + [
+                    pv_turn['bspn'] + pv_turn['db'] + pv_turn['aspn'] +
+                    pv_turn['resp']
+                ]
+
+            # prompt response, add sos_r
+            inputs['src'] = pv_context + [context]
+
+            if self.use_all_previous_context:
+                inputs['labels'] = pv_context + [
+                    context
+                ]  # use all previous ubar history
+            else:
+                inputs['labels'] = [context]  # use previous turn
+
+        return inputs, prompt_id
diff --git a/modelscope/preprocessors/space/fields/intent_field.py b/modelscope/preprocessors/space/fields/intent_field.py
new file mode 100644
index 00000000..0c8c909e
--- /dev/null
+++ b/modelscope/preprocessors/space/fields/intent_field.py
@@ -0,0 +1,1074 @@
+"""
+Intent Field class
+"""
+import glob
+import multiprocessing
+import os
+import random
+import re
+import time
+from collections import defaultdict
+from itertools import chain
+
+import json
+import numpy as np
+from tqdm import tqdm
+
+from modelscope.preprocessors.space.tokenizer import Tokenizer
+from modelscope.utils.nlp.space import ontology, utils
+from modelscope.utils.nlp.space.scores import hierarchical_set_score
+from modelscope.utils.nlp.space.utils import list2np
+
+
+class BPETextField(object):
+
+    pad_token = '[PAD]'
+    bos_token = '[BOS]'
+    eos_token = '[EOS]'
+    unk_token = '[UNK]'
+    mask_token = '[MASK]'
+    sos_u_token = '<sos_u>'
+    eos_u_token = '<eos_u>'
+    sos_b_token = '<sos_b>'
+    eos_b_token = '<eos_b>'
+    sos_db_token = '<sos_db>'
+    eos_db_token = '<eos_db>'
+    sos_a_token = '<sos_a>'
+    eos_a_token = '<eos_a>'
+    sos_r_token = '<sos_r>'
+    eos_r_token = '<eos_r>'
+
+    def __init__(self, model_dir, config):
+        self.score_matrixs = {}
+        self.prompt_num_for_understand = config.BPETextField.prompt_num_for_understand
+        self.prompt_num_for_policy = config.BPETextField.prompt_num_for_policy
+        self.understand_tokens = ontology.get_understand_tokens(
+            self.prompt_num_for_understand)
+        self.policy_tokens = ontology.get_policy_tokens(
+            self.prompt_num_for_policy)
+        special_tokens = [
+            self.pad_token, self.bos_token, self.eos_token, self.unk_token
+        ]
+        special_tokens.extend(self.add_sepcial_tokens())
+        self.tokenizer = Tokenizer(
+            vocab_path=os.path.join(model_dir, 'vocab.txt'),
+            special_tokens=special_tokens,
+            tokenizer_type=config.BPETextField.tokenizer_type)
+        self.understand_ids = self.numericalize(self.understand_tokens)
+        self.policy_ids = self.numericalize(self.policy_tokens)
+
+        self.tokenizer_type = config.BPETextField.tokenizer_type
+        self.filtered = config.BPETextField.filtered
+        self.max_len = config.BPETextField.max_len
+        self.min_utt_len = config.BPETextField.min_utt_len
+        self.max_utt_len = config.BPETextField.max_utt_len
+        self.min_ctx_turn = config.BPETextField.min_ctx_turn
+        self.max_ctx_turn = config.BPETextField.max_ctx_turn
+        self.policy = config.BPETextField.policy
+        self.generation = config.BPETextField.generation
+        self.with_mlm = config.Dataset.with_mlm
+        self.with_query_bow = config.BPETextField.with_query_bow
+        self.with_contrastive = config.Dataset.with_contrastive
+        self.num_process = config.Dataset.num_process
+        self.dynamic_score = config.Dataset.dynamic_score
+        self.abandon_label = config.Dataset.abandon_label
+        self.trigger_role = config.Dataset.trigger_role
+        self.trigger_data = config.Dataset.trigger_data.split(
+            ',') if config.Dataset.trigger_data else []
+
+        # data_paths = list(os.path.dirname(c) for c in sorted(
+        #     glob.glob(hparams.data_dir + '/**/' + f'train.{hparams.tokenizer_type}.jsonl', recursive=True)))
+        # self.data_paths = self.filter_data_path(data_paths=data_paths)
+        # self.labeled_data_paths = [data_path for data_path in self.data_paths if 'UniDA' in data_path]
+        # self.unlabeled_data_paths = [data_path for data_path in self.data_paths if 'UnDial' in data_path]
+        # assert len(self.unlabeled_data_paths) + len(self.labeled_data_paths) == len(self.data_paths)
+        # assert len(self.labeled_data_paths) or len(self.unlabeled_data_paths), 'No dataset is loaded'
+
+    @property
+    def vocab_size(self):
+        return self.tokenizer.vocab_size
+
+    @property
+    def num_specials(self):
+        return len(self.tokenizer.special_tokens)
+
+    @property
+    def pad_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.pad_token])[0]
+
+    @property
+    def bos_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.bos_token])[0]
+
+    @property
+    def eos_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_token])[0]
+
+    @property
+    def unk_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.unk_token])[0]
+
+    @property
+    def mask_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.mask_token])[0]
+
+    @property
+    def sos_u_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_u_token])[0]
+
+    @property
+    def eos_u_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_u_token])[0]
+
+    @property
+    def sos_b_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_b_token])[0]
+
+    @property
+    def eos_b_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_b_token])[0]
+
+    @property
+    def sos_db_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_db_token])[0]
+
+    @property
+    def eos_db_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_db_token])[0]
+
+    @property
+    def sos_a_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_a_token])[0]
+
+    @property
+    def eos_a_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_a_token])[0]
+
+    @property
+    def sos_r_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.sos_r_token])[0]
+
+    @property
+    def eos_r_id(self):
+        return self.tokenizer.convert_tokens_to_ids([self.eos_r_token])[0]
+
+    @property
+    def bot_id(self):
+        """
+        用于区分user和bot两个角色
+        1和0不是词表中的index，而是专门针对role的index，大小就为2，对应超参数'num_type_embeddings'
+        """
+        return 0
+
+    @property
+    def user_id(self):
+        """
+        用于区分user和bot两个角色
+        1和0不是词表中的index，而是专门针对role的index，大小就为2，对应超参数'num_type_embeddings'
+        """
+        return 1
+
+    def add_sepcial_tokens(self):
+        prompt_tokens = self.understand_tokens + self.policy_tokens
+        return ontology.get_special_tokens(other_tokens=prompt_tokens)
+
+    def filter_data_path(self, data_paths):
+        if self.trigger_data:
+            filtered_data_paths = []
+            for data_path in data_paths:
+                for data_name in self.trigger_data:
+                    if data_path.endswith(f'/{data_name}'):
+                        filtered_data_paths.append(data_path)
+                        break
+        else:
+            filtered_data_paths = data_paths
+        return filtered_data_paths
+
+    def load_score_matrix(self, data_type, data_iter=None):
+        """
+        load score matrix for all labeled datasets
+        """
+        for data_path in self.labeled_data_paths:
+            file_index = os.path.join(
+                data_path, f'{data_type}.{self.tokenizer_type}.jsonl')
+            file = os.path.join(data_path, f'{data_type}.Score.npy')
+            if self.dynamic_score:
+                score_matrix = {}
+                print(f"Created 1 score cache dict for data in '{file_index}'")
+            else:
+                # TODO add post score matrix
+                assert os.path.exists(file), f"{file} isn't exist"
+                print(f"Loading 1 score matrix from '{file}' ...")
+                fp = np.memmap(file, dtype='float32', mode='r')
+                assert len(fp.shape) == 1
+                num = int(np.sqrt(fp.shape[0]))
+                score_matrix = fp.reshape(num, num)
+                print(f"Loaded 1 score matrix for data in '{file_index}'")
+            self.score_matrixs[file_index] = score_matrix
+
+    def random_word(self, chars):
+        output_label = []
+        output_chars = []
+
+        for i, char in enumerate(chars):
+            # TODO delete this part to learn special tokens
+            if char in [
+                    self.sos_u_id, self.eos_u_id, self.sos_r_id, self.eos_r_id
+            ]:
+                output_chars.append(char)
+                output_label.append(self.pad_id)
+                continue
+
+            prob = random.random()
+            if prob < 0.15:
+                prob /= 0.15
+
+                # 80% randomly change token to mask token
+                if prob < 0.8:
+                    output_chars.append(self.mask_id)
+
+                # 10% randomly change token to random token
+                elif prob < 0.9:
+                    output_chars.append(
+                        random.randint(1, self.vocab_size -
+                                       1))  # start from 1, to exclude pad_id
+
+                # 10% randomly change token to current token
+                else:
+                    output_chars.append(char)
+
+                output_label.append(char)
+
+            else:
+                output_chars.append(char)
+                output_label.append(self.pad_id)
+
+        return output_chars, output_label
+
+    def create_masked_lm_predictions(self, sample):
+        src = sample['src']
+        src_span_mask = sample['src_span_mask']
+        mlm_inputs = []
+        mlm_labels = []
+        for chars, chars_span_mask in zip(src, src_span_mask):
+            if sum(chars_span_mask):
+                mlm_input, mlm_label = [], []
+                for char, char_mask in zip(chars, chars_span_mask):
+                    if char_mask:
+                        mlm_input.append(self.mask_id)
+                        mlm_label.append(char)
+                    else:
+                        mlm_input.append(char)
+                        mlm_label.append(self.pad_id)
+            else:
+                mlm_input, mlm_label = self.random_word(chars)
+            mlm_inputs.append(mlm_input)
+            mlm_labels.append(mlm_label)
+
+        sample['mlm_inputs'] = mlm_inputs
+        sample['mlm_labels'] = mlm_labels
+        return sample
+
+    def create_span_masked_lm_predictions(self, sample):
+        src = sample['src']
+        src_span_mask = sample['src_span_mask']
+        mlm_inputs = []
+        mlm_labels = []
+        for chars, chars_span_mask in zip(src, src_span_mask):
+            mlm_input, mlm_label = [], []
+            for char, char_mask in zip(chars, chars_span_mask):
+                if char_mask:
+                    mlm_input.append(self.mask_id)
+                    mlm_label.append(char)
+                else:
+                    mlm_input.append(char)
+                    mlm_label.append(self.pad_id)
+            mlm_inputs.append(mlm_input)
+            mlm_labels.append(mlm_label)
+
+        sample['mlm_inputs'] = mlm_inputs
+        sample['mlm_labels'] = mlm_labels
+        return sample
+
+    def create_token_masked_lm_predictions(self, sample):
+        mlm_inputs = sample['mlm_inputs']
+        mlm_labels = sample['mlm_labels']
+
+        for i, span_mlm_label in enumerate(mlm_labels):
+            if not sum(span_mlm_label):
+                mlm_input, mlm_label = self.random_word(mlm_inputs[i])
+                mlm_inputs[i] = mlm_input
+                mlm_labels[i] = mlm_label
+
+        return sample
+
+    def numericalize(self, tokens):
+        """
+        here only "convert_tokens_to_ids",
+        which need be tokenized into tokens(sub-words) by "tokenizer.tokenize" before
+        """
+        assert isinstance(tokens, list)
+        if len(tokens) == 0:
+            return []
+        element = tokens[0]
+        if isinstance(element, list):
+            return [self.numericalize(s) for s in tokens]
+        else:
+            return self.tokenizer.convert_tokens_to_ids(tokens)
+
+    def denumericalize(self, numbers):
+        """
+        here first "convert_ids_to_tokens", then combine sub-words into origin words
+        """
+        assert isinstance(numbers, list)
+        if len(numbers) == 0:
+            return []
+        element = numbers[0]
+        if isinstance(element, list):
+            return [self.denumericalize(x) for x in numbers]
+        else:
+            return self.tokenizer.decode(
+                numbers,
+                ignore_tokens=[self.bos_token, self.eos_token, self.pad_token])
+
+    def save_examples(self, examples, filename):
+        start = time.time()
+        if filename.endswith('npy'):
+            print(f"Saving 1 object to '{filename}' ...")
+            assert len(
+                examples.shape) == 2 and examples.shape[0] == examples.shape[1]
+            num = examples.shape[0]
+            fp = np.memmap(
+                filename, dtype='float32', mode='w+', shape=(num, num))
+            fp[:] = examples[:]
+            fp.flush()
+            elapsed = time.time() - start
+            print(f'Saved 1 object (elapsed {elapsed:.2f}s)')
+        elif filename.endswith('jsonl'):
+            print(f"Saving examples to '{filename}' ...")
+            with open(filename, 'w', encoding='utf-8') as fp:
+                for ex in examples:
+                    fp.write(json.dumps(ex) + '\n')
+            elapsed = time.time() - start
+            print(f'Saved {len(examples)} examples (elapsed {elapsed:.2f}s)')
+        else:
+            print(f"Saving examples to '{filename}' ...")
+            raise ValueError(f'Unsport file format: {filename}')
+
+    def load_examples(self, filename):
+        start = time.time()
+        if filename.endswith('npy'):
+            print(f"Loading 1 object from '{filename}' ...")
+            fp = np.memmap(filename, dtype='float32', mode='r')
+            assert len(fp.shape) == 1
+            num = int(np.sqrt(fp.shape[0]))
+            examples = fp.reshape(num, num)
+            elapsed = time.time() - start
+            print(f'Loaded 1 object (elapsed {elapsed:.2f}s)')
+        else:
+            print(f"Loading examples from '{filename}' ...")
+            with open(filename, 'r', encoding='utf-8') as fp:
+                examples = list(map(lambda s: json.loads(s.strip()), fp))
+            elapsed = time.time() - start
+            print(f'Loaded {len(examples)} examples (elapsed {elapsed:.2f}s)')
+        return examples
+
+    def utt_filter_pred(self, utt):
+        return self.min_utt_len <= len(utt) \
+            and (not self.filtered or len(utt) <= self.max_utt_len)
+
+    def utts_filter_pred(self, utts):
+        return self.min_ctx_turn <= len(utts) \
+            and (not self.filtered or len(utts) <= self.max_ctx_turn)
+
+    def get_token_pos(self, tok_list, value_label):
+        find_pos = []
+        found = False
+        label_list = [
+            item
+            for item in map(str.strip, re.split('(\\W+)', value_label.lower()))
+            if len(item) > 0
+        ]
+        len_label = len(label_list)
+        for i in range(len(tok_list) + 1 - len_label):
+            if tok_list[i:i + len_label] == label_list:
+                find_pos.append((i, i + len_label))  # start, exclusive_end
+                found = True
+        return found, find_pos
+
+    def build_score_matrix(self, examples):
+        """
+        build symmetric score matrix
+        """
+        assert self.num_process == 1
+        print(f'Building score matrix from examples ...')
+        num = len(examples)
+        score_matrix = np.eye(
+            num, num, dtype='float32'
+        )  # in case of empty label of self, resulting in score 0.
+
+        for i in tqdm(range(num)):
+            for j in range(i):
+                # TODO change the score method
+                score = hierarchical_set_score(
+                    frame1=examples[i]['label'], frame2=examples[j]['label'])
+                score_matrix[i][j] = score
+                score_matrix[j][i] = score
+
+        print(f'Built score matrix')
+        return score_matrix
+
+    def build_score_matrix_on_the_fly(self,
+                                      ids,
+                                      labels,
+                                      data_file,
+                                      is_post=False):
+        """
+        build symmetric score matrix on the fly
+        @is_post: True for resp label of sample i and j, False for query label of sample i and j
+        """
+        num = len(labels)
+        tag = 'r' if is_post else 'q'
+        assert len(ids) == len(labels)
+        score_matrix = np.eye(
+            num, num, dtype='float32'
+        )  # in case of empty label of self, resulting in score 0.
+
+        for i in range(num):
+            for j in range(i):
+                score = self.score_matrixs[data_file].get(
+                    f'{ids[i]}-{ids[j]}-{tag}', None)
+                if score is None:
+                    score = self.score_matrixs[data_file].get(
+                        f'{ids[j]}-{ids[i]}-{tag}', None)
+                if score is None:
+                    # TODO change the score method
+                    score = hierarchical_set_score(
+                        frame1=labels[i], frame2=labels[j])
+                    self.score_matrixs[data_file][
+                        f'{ids[i]}-{ids[j]}-{tag}'] = score
+                score_matrix[i][j] = score
+                score_matrix[j][i] = score
+
+        return score_matrix
+
+    def build_score_matrix_func(self, examples, start, exclusive_end):
+        """
+        build sub score matrix
+        """
+        num = len(examples)
+        process_id = os.getpid()
+        description = f'PID: {process_id} Start: {start} End: {exclusive_end}'
+        print(
+            f'PID-{process_id}: Building {start} to {exclusive_end} lines score matrix from examples ...'
+        )
+        score_matrix = np.zeros((exclusive_end - start, num), dtype='float32')
+
+        for abs_i, i in enumerate(
+                tqdm(range(start, exclusive_end), desc=description)):
+            for j in range(num):
+                # TODO change the score method
+                score = hierarchical_set_score(
+                    frame1=examples[i]['label'], frame2=examples[j]['label'])
+                score_matrix[abs_i][j] = score
+
+        print(
+            f'PID-{process_id}: Built {start} to {exclusive_end} lines score matrix'
+        )
+        return {'start': start, 'score_matrix': score_matrix}
+
+    def build_score_matrix_multiprocessing(self, examples):
+        """
+        build score matrix
+        """
+        assert self.num_process >= 2 and multiprocessing.cpu_count() >= 2
+        print(f'Building score matrix from examples ...')
+        results = []
+        num = len(examples)
+        sub_num, res_num = num // self.num_process, num % self.num_process
+        patches = [sub_num] * (self.num_process - 1) + [sub_num + res_num]
+
+        start = 0
+        pool = multiprocessing.Pool(processes=self.num_process)
+        for patch in patches:
+            exclusive_end = start + patch
+            results.append(
+                pool.apply_async(self.build_score_matrix_func,
+                                 (examples, start, exclusive_end)))
+            start = exclusive_end
+        pool.close()
+        pool.join()
+
+        sub_score_matrixs = [result.get() for result in results]
+        sub_score_matrixs = sorted(
+            sub_score_matrixs, key=lambda sub: sub['start'])
+        sub_score_matrixs = [
+            sub_score_matrix['score_matrix']
+            for sub_score_matrix in sub_score_matrixs
+        ]
+        score_matrix = np.concatenate(sub_score_matrixs, axis=0)
+        assert score_matrix.shape == (num, num)
+        np.fill_diagonal(
+            score_matrix,
+            1.)  # in case of empty label of self, resulting in score 0.
+
+        print(f'Built score matrix')
+        return score_matrix
+
+    def extract_span_texts(self, text, label):
+        span_texts = []
+        for domain, frame in label.items():
+            for act, slot_values in frame.items():
+                for slot, values in slot_values.items():
+                    for value in values:
+                        if value['span']:
+                            span_texts.append(
+                                text[value['span'][0]:value['span'][1]])
+                        elif str(value['value']).strip().lower() in text.strip(
+                        ).lower():
+                            span_texts.append(str(value['value']))
+        return span_texts
+
+    def fix_label(self, label):
+        for domain, frame in label.items():
+            if not frame:
+                return {}
+            for act, slot_values in frame.items():
+                if act == 'DEFAULT_INTENT' and not slot_values:
+                    return {}
+        return label
+
+    def build_examples_multi_turn(self, data_file, data_type='train'):
+        print(f"Reading examples from '{data_file}' ...")
+        examples = []
+        ignored = 0
+
+        with open(data_file, 'r', encoding='utf-8') as f:
+            input_data = json.load(f)
+            for dialog_id in tqdm(input_data):
+                turns = input_data[dialog_id]['turns']
+                history, history_role, history_span_mask, history_label = [], [], [], []
+                for t, turn in enumerate(turns):
+                    label = turn['label']
+                    role = turn['role']
+                    text = turn['text']
+                    utterance, span_mask = [], []
+
+                    token_list = [
+                        tok for tok in map(str.strip,
+                                           re.split('(\W+)', text.lower()))
+                        if len(tok) > 0
+                    ]
+                    span_list = np.zeros(len(token_list), dtype=np.int32)
+                    span_texts = self.extract_span_texts(
+                        text=text, label=label)
+
+                    for span_text in span_texts:
+                        found, find_pos = self.get_token_pos(
+                            tok_list=token_list, value_label=span_text)
+                        if found:
+                            for start, exclusive_end in find_pos:
+                                span_list[start:exclusive_end] = 1
+
+                    token_list = [
+                        self.tokenizer.tokenize(token) for token in token_list
+                    ]
+                    span_list = [[tag] * len(token_list[i])
+                                 for i, tag in enumerate(span_list)]
+                    for sub_tokens in token_list:
+                        utterance.extend(sub_tokens)
+                    for sub_spans in span_list:
+                        span_mask.extend(sub_spans)
+                    assert len(utterance) == len(span_mask)
+
+                    history.append(utterance)
+                    history_role.append(role)
+                    history_span_mask.append(span_mask)
+                    history_label.append(self.fix_label(label))
+
+                    if (
+                        (self.utts_filter_pred(history[:-1])
+                         and all(map(self.utt_filter_pred, history)))
+                            or data_type == 'test'
+                    ) and role in self.trigger_role and t:  # TODO consider test
+                        src = [
+                            s[-self.max_utt_len:]
+                            for s in history[:-1][-self.max_ctx_turn:]
+                        ]
+                        src_span_mask = [
+                            s[-self.max_utt_len:] for s in
+                            history_span_mask[:-1][-self.max_ctx_turn:]
+                        ]
+                        roles = [
+                            role
+                            for role in history_role[:-1][-self.max_ctx_turn:]
+                        ]
+                        src = [[self.sos_u_id] + self.numericalize(s) +
+                               [self.eos_u_id]
+                               if roles[i] == 'user' else [self.sos_r_id] +
+                               self.numericalize(s) + [self.eos_r_id]
+                               for i, s in enumerate(src)]
+                        src_span_mask = [[0] + list(map(int, s)) + [0]
+                                         for s in src_span_mask]
+
+                        tgt = [self.sos_r_id] + self.numericalize(
+                            history[-1]) + [self.eos_r_id]
+                        if data_type != 'test':
+                            tgt = tgt[:self.max_utt_len + 2]
+
+                        ex = {
+                            'dialog_id': dialog_id,
+                            'turn_id': turn['turn_id'],
+                            'src': src,
+                            'src_span_mask': src_span_mask,
+                            'tgt': tgt,
+                            'query_label': history_label[-2],
+                            'resp_label': history_label[-1],
+                            'extra_info': turn.get('extra_info', '')
+                        }
+                        examples.append(ex)
+                    else:
+                        ignored += 1
+
+        # add span mlm inputs and span mlm labels in advance
+        if self.with_mlm:
+            examples = [
+                self.create_span_masked_lm_predictions(example)
+                for example in examples
+            ]
+
+        # add absolute id of the dataset for indexing scores in its score matrix
+        for i, example in enumerate(examples):
+            example['id'] = i
+
+        print(
+            f'Built {len(examples)} {data_type.upper()} examples ({ignored} filtered)'
+        )
+        return examples
+
+    def preprocessor(self, text_list):
+        role = 'user'
+        examples = []
+
+        for text in text_list:
+            history, history_role, history_span_mask = [], [], []
+            utterance, span_mask = [], []
+            token_list = [
+                tok for tok in map(str.strip, re.split('(\W+)', text.lower()))
+                if len(tok) > 0
+            ]
+            span_list = np.zeros(len(token_list), dtype=np.int32)
+            token_list = [
+                self.tokenizer.tokenize(token) for token in token_list
+            ]
+            span_list = [[tag] * len(token_list[i])
+                         for i, tag in enumerate(span_list)]
+
+            for sub_tokens in token_list:
+                utterance.extend(sub_tokens)
+            for sub_spans in span_list:
+                span_mask.extend(sub_spans)
+            assert len(utterance) == len(span_mask)
+
+            history.append(utterance)
+            history_role.append(role)
+            history_span_mask.append(span_mask)
+
+            src = [s[-self.max_utt_len:] for s in history[-self.max_ctx_turn:]]
+            src_span_mask = [
+                s[-self.max_utt_len:]
+                for s in history_span_mask[-self.max_ctx_turn:]
+            ]
+            roles = [role for role in history_role[-self.max_ctx_turn:]]
+            src = [[self.sos_u_id] + self.numericalize(s) +
+                   [self.eos_u_id] if roles[i] == 'user' else [self.sos_r_id] +
+                   self.numericalize(s) + [self.eos_r_id]
+                   for i, s in enumerate(src)]
+            src_span_mask = [[0] + list(map(int, s)) + [0]
+                             for s in src_span_mask]
+
+            ex = {
+                'dialog_id': 'inference',
+                'turn_id': 0,
+                'role': role,
+                'src': src,
+                'src_span_mask': src_span_mask,
+                'query_label': {
+                    'DEFAULT_DOMAIN': {
+                        'card_arrival': {}
+                    }
+                },
+                'extra_info': {
+                    'intent_label': -1
+                }
+            }
+            examples.append(ex)
+        # add span mlm inputs and span mlm labels in advance
+        if self.with_mlm:
+            examples = [
+                self.create_span_masked_lm_predictions(example)
+                for example in examples
+            ]
+
+        # add absolute id of the dataset for indexing scores in its score matrix
+        for i, example in enumerate(examples):
+            example['id'] = i
+
+        return examples
+
+    def build_examples_single_turn(self, data_file, data_type='train'):
+        print(f"Reading examples from '{data_file}' ...")
+        examples = []
+        ignored = 0
+
+        with open(data_file, 'r', encoding='utf-8') as f:
+            input_data = json.load(f)
+            for dialog_id in tqdm(input_data):
+                turns = input_data[dialog_id]['turns']
+                history, history_role, history_span_mask = [], [], []
+                for turn in turns:
+                    label = turn['label']
+                    role = turn['role']
+                    text = turn['text']
+                    utterance, span_mask = [], []
+
+                    token_list = [
+                        tok for tok in map(str.strip,
+                                           re.split('(\W+)', text.lower()))
+                        if len(tok) > 0
+                    ]
+                    span_list = np.zeros(len(token_list), dtype=np.int32)
+                    span_texts = self.extract_span_texts(
+                        text=text, label=label)
+
+                    for span_text in span_texts:
+                        found, find_pos = self.get_token_pos(
+                            tok_list=token_list, value_label=span_text)
+                        if found:
+                            for start, exclusive_end in find_pos:
+                                span_list[start:exclusive_end] = 1
+
+                    token_list = [
+                        self.tokenizer.tokenize(token) for token in token_list
+                    ]
+                    span_list = [[tag] * len(token_list[i])
+                                 for i, tag in enumerate(span_list)]
+                    for sub_tokens in token_list:
+                        utterance.extend(sub_tokens)
+                    for sub_spans in span_list:
+                        span_mask.extend(sub_spans)
+                    assert len(utterance) == len(span_mask)
+
+                    history.append(utterance)
+                    history_role.append(role)
+                    history_span_mask.append(span_mask)
+
+                    if ((self.utts_filter_pred(history)
+                         and all(map(self.utt_filter_pred, history)))
+                            or data_type == 'test'
+                        ) and role in self.trigger_role:  # TODO consider test
+                        src = [
+                            s[-self.max_utt_len:]
+                            for s in history[-self.max_ctx_turn:]
+                        ]
+                        src_span_mask = [
+                            s[-self.max_utt_len:]
+                            for s in history_span_mask[-self.max_ctx_turn:]
+                        ]
+                        roles = [
+                            role for role in history_role[-self.max_ctx_turn:]
+                        ]
+                        src = [[self.sos_u_id] + self.numericalize(s) +
+                               [self.eos_u_id]
+                               if roles[i] == 'user' else [self.sos_r_id] +
+                               self.numericalize(s) + [self.eos_r_id]
+                               for i, s in enumerate(src)]
+                        src_span_mask = [[0] + list(map(int, s)) + [0]
+                                         for s in src_span_mask]
+
+                        ex = {
+                            'dialog_id': dialog_id,
+                            'turn_id': turn['turn_id'],
+                            'role': role,
+                            'src': src,
+                            'src_span_mask': src_span_mask,
+                            'query_label': self.fix_label(label),
+                            'extra_info': turn.get('extra_info', '')
+                        }
+                        examples.append(ex)
+                    else:
+                        ignored += 1
+
+        # add span mlm inputs and span mlm labels in advance
+        if self.with_mlm:
+            examples = [
+                self.create_span_masked_lm_predictions(example)
+                for example in examples
+            ]
+
+        # add absolute id of the dataset for indexing scores in its score matrix
+        for i, example in enumerate(examples):
+            example['id'] = i
+
+        print(
+            f'Built {len(examples)} {data_type.upper()} examples ({ignored} filtered)'
+        )
+        return examples
+
+    def collate_fn_multi_turn(self, samples):
+        batch_size = len(samples)
+        batch = {}
+
+        src = [sp['src'] for sp in samples]
+        query_token, src_token, src_pos, src_turn, src_role = [], [], [], [], []
+        for utts in src:
+            query_token.append(utts[-1])
+            utt_lens = [len(utt) for utt in utts]
+
+            # Token ids
+            src_token.append(list(chain(*utts))[-self.max_len:])
+
+            # Position ids
+            pos = [list(range(l)) for l in utt_lens]
+            src_pos.append(list(chain(*pos))[-self.max_len:])
+
+            # Turn ids
+            turn = [[len(utts) - i] * l for i, l in enumerate(utt_lens)]
+            src_turn.append(list(chain(*turn))[-self.max_len:])
+
+            # Role ids
+            role = [
+                [self.bot_id if (len(utts) - i) % 2 == 0 else self.user_id] * l
+                for i, l in enumerate(utt_lens)
+            ]
+            src_role.append(list(chain(*role))[-self.max_len:])
+
+        # src端序列和tgt端序列需要分开pad，以保证解码时第一个词对齐
+        src_token = list2np(src_token, padding=self.pad_id)
+        src_pos = list2np(src_pos, padding=self.pad_id)
+        src_turn = list2np(src_turn, padding=self.pad_id)
+        src_role = list2np(src_role, padding=self.pad_id)
+        batch['src_token'] = src_token
+        batch['src_pos'] = src_pos
+        batch['src_type'] = src_role
+        batch['src_turn'] = src_turn
+        batch['src_mask'] = (src_token != self.pad_id).astype('int64')
+
+        if self.with_query_bow:
+            query_token = list2np(query_token, padding=self.pad_id)
+            batch['query_token'] = query_token
+            batch['query_mask'] = (query_token != self.pad_id).astype('int64')
+
+        if self.with_mlm:
+            mlm_token, mlm_label = [], []
+            raw_mlm_input = [sp['mlm_inputs'] for sp in samples]
+            raw_mlm_label = [sp['mlm_labels'] for sp in samples]
+            for inputs in raw_mlm_input:
+                mlm_token.append(list(chain(*inputs))[-self.max_len:])
+            for labels in raw_mlm_label:
+                mlm_label.append(list(chain(*labels))[-self.max_len:])
+
+            mlm_token = list2np(mlm_token, padding=self.pad_id)
+            mlm_label = list2np(mlm_label, padding=self.pad_id)
+            batch['mlm_token'] = mlm_token
+            batch['mlm_label'] = mlm_label
+            batch['mlm_mask'] = (mlm_label != self.pad_id).astype('int64')
+
+        if self.dynamic_score and self.with_contrastive and not self.abandon_label:
+            query_labels = [sp['query_label'] for sp in samples]
+            batch['query_labels'] = query_labels
+            if self.trigger_role == 'system':
+                resp_labels = [sp['resp_label'] for sp in samples]
+                batch['resp_labels'] = resp_labels
+            batch['label_ids'] = np.arange(
+                batch_size)  # to identify labels for each GPU when multi-gpu
+
+        if self.understand_ids:
+            understand = [self.understand_ids for _ in samples]
+            understand_token = np.array(understand).astype('int64')
+            batch['understand_token'] = understand_token
+            batch['understand_mask'] = (understand_token !=
+                                        self.pad_id).astype('int64')
+
+        if self.policy_ids and self.policy:
+            policy = [self.policy_ids for _ in samples]
+            policy_token = np.array(policy).astype('int64')
+            batch['policy_token'] = policy_token
+            batch['policy_mask'] = (policy_token !=
+                                    self.pad_id).astype('int64')
+
+        if 'tgt' in samples[0]:
+            tgt = [sp['tgt'] for sp in samples]
+
+            # Token ids & Label ids
+            tgt_token = list2np(tgt, padding=self.pad_id)
+
+            # Position ids
+            tgt_pos = np.zeros_like(tgt_token)
+            tgt_pos[:] = np.arange(tgt_token.shape[1], dtype=tgt_token.dtype)
+
+            # Turn ids
+            tgt_turn = np.zeros_like(tgt_token)
+
+            # Role ids
+            tgt_role = np.full_like(tgt_token, self.bot_id)
+
+            batch['tgt_token'] = tgt_token
+            batch['tgt_pos'] = tgt_pos
+            batch['tgt_type'] = tgt_role
+            batch['tgt_turn'] = tgt_turn
+            batch['tgt_mask'] = (tgt_token != self.pad_id).astype('int64')
+
+        if 'id' in samples[0]:
+            ids = [sp['id'] for sp in samples]
+            ids = np.array(ids).astype('int64')
+            batch['ids'] = ids
+
+        return batch, batch_size
+
+
+class IntentBPETextField(BPETextField):
+
+    def __init__(self, model_dir, config):
+        super(IntentBPETextField, self).__init__(model_dir, config)
+
+    def retrieve_examples(self,
+                          dataset,
+                          labels,
+                          inds,
+                          task,
+                          num=None,
+                          cache=None):
+        assert task == 'intent', 'Example-driven may only be used with intent prediction'
+        if num is None and labels is not None:
+            num = len(labels) * 2
+
+        # Populate cache
+        if cache is None:
+            cache = defaultdict(list)
+            for i, example in enumerate(dataset):
+                assert i == example['id']
+                cache[example['extra_info']['intent_label']].append(i)
+
+        # One example for each label
+        example_inds = []
+        for l in set(labels.tolist()):
+            if l == -1:
+                continue
+
+            ind = random.choice(cache[l])
+            retries = 0
+            while ind in inds.tolist() or type(ind) is not int:
+                ind = random.choice(cache[l])
+                retries += 1
+                if retries > len(dataset):
+                    break
+
+            example_inds.append(ind)
+
+        # Sample randomly until we hit batch size
+        while len(example_inds) < min(len(dataset), num):
+            ind = random.randint(0, len(dataset) - 1)
+            if ind not in example_inds and ind not in inds.tolist():
+                example_inds.append(ind)
+
+        # Create examples
+        example_batch = {}
+        examples = [dataset[i] for i in example_inds]
+        examples, _ = self.collate_fn_multi_turn(examples)
+        example_batch['example_src_token'] = examples['src_token']
+        example_batch['example_src_pos'] = examples['src_pos']
+        example_batch['example_src_type'] = examples['src_type']
+        example_batch['example_src_turn'] = examples['src_turn']
+        example_batch['example_src_mask'] = examples['src_mask']
+        example_batch['example_tgt_token'] = examples['tgt_token']
+        example_batch['example_tgt_mask'] = examples['tgt_mask']
+        example_batch['example_intent'] = examples['intent_label']
+
+        return example_batch
+
+    def collate_fn_multi_turn(self, samples):
+        batch_size = len(samples)
+        batch = {}
+
+        cur_roles = [sp['role'] for sp in samples]
+        src = [sp['src'] for sp in samples]
+        src_token, src_pos, src_turn, src_role = [], [], [], []
+        for utts, cur_role in zip(src, cur_roles):
+            utt_lens = [len(utt) for utt in utts]
+
+            # Token ids
+            src_token.append(list(chain(*utts))[-self.max_len:])
+
+            # Position ids
+            pos = [list(range(l)) for l in utt_lens]
+            src_pos.append(list(chain(*pos))[-self.max_len:])
+
+            # Turn ids
+            turn = [[len(utts) - i] * l for i, l in enumerate(utt_lens)]
+            src_turn.append(list(chain(*turn))[-self.max_len:])
+
+            # Role ids
+            if cur_role == 'user':
+                role = [[
+                    self.bot_id if (len(utts) - i) % 2 == 0 else self.user_id
+                ] * l for i, l in enumerate(utt_lens)]
+            else:
+                role = [[
+                    self.user_id if (len(utts) - i) % 2 == 0 else self.bot_id
+                ] * l for i, l in enumerate(utt_lens)]
+            src_role.append(list(chain(*role))[-self.max_len:])
+
+        # src端序列和tgt端序列需要分开pad，以保证解码时第一个词对齐
+        src_token = list2np(src_token, padding=self.pad_id)
+        src_pos = list2np(src_pos, padding=self.pad_id)
+        src_turn = list2np(src_turn, padding=self.pad_id)
+        src_role = list2np(src_role, padding=self.pad_id)
+        batch['src_token'] = src_token
+        batch['src_pos'] = src_pos
+        batch['src_type'] = src_role
+        batch['src_turn'] = src_turn
+        batch['src_mask'] = (src_token != self.pad_id).astype(
+            'int64')  # input mask
+
+        if self.with_mlm:
+            mlm_token, mlm_label = [], []
+            raw_mlm_input = [sp['mlm_inputs'] for sp in samples]
+            raw_mlm_label = [sp['mlm_labels'] for sp in samples]
+            for inputs in raw_mlm_input:
+                mlm_token.append(list(chain(*inputs))[-self.max_len:])
+            for labels in raw_mlm_label:
+                mlm_label.append(list(chain(*labels))[-self.max_len:])
+
+            mlm_token = list2np(mlm_token, padding=self.pad_id)
+            mlm_label = list2np(mlm_label, padding=self.pad_id)
+            batch['mlm_token'] = mlm_token
+            batch['mlm_label'] = mlm_label
+            batch['mlm_mask'] = (mlm_label != self.pad_id).astype(
+                'int64')  # label mask
+
+        if self.understand_ids:
+            tgt = [self.understand_ids for _ in samples]
+            tgt_token = np.array(tgt).astype('int64')
+            batch['tgt_token'] = tgt_token
+            batch['tgt_mask'] = (tgt_token != self.pad_id).astype(
+                'int64')  # input mask
+
+        if 'id' in samples[0]:
+            ids = [sp['id'] for sp in samples]
+            ids = np.array(ids).astype('int64')
+            batch['ids'] = ids
+
+        if self.dynamic_score and self.with_contrastive:
+            query_labels = [sp['query_label'] for sp in samples]
+            batch['query_labels'] = query_labels
+            batch['label_ids'] = np.arange(batch_size)
+
+        if 'intent_label' in samples[0]['extra_info']:
+            intent_label = [
+                sample['extra_info']['intent_label'] for sample in samples
+            ]
+            intent_label = np.array(intent_label).astype('int64')
+            batch['intent_label'] = intent_label
+
+        return batch, batch_size
diff --git a/modelscope/preprocessors/space/tokenizer.py b/modelscope/preprocessors/space/tokenizer.py
new file mode 100644
index 00000000..fe64493e
--- /dev/null
+++ b/modelscope/preprocessors/space/tokenizer.py
@@ -0,0 +1,665 @@
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+import collections
+import logging
+import os
+import sys
+import unicodedata
+
+import json
+import regex as re
+
+
+def clean_string(string):
+    replace_mp = {
+        ' - ': '-',
+        " ' ": "'",
+        " n't": "n't",
+        " 'm": "'m",
+        ' do not': " don't",
+        " 's": "'s",
+        " 've": "'ve",
+        " 're": "'re"
+    }
+    for k, v in replace_mp.items():
+        string = string.replace(k, v)
+    return string
+
+
+class Tokenizer(object):
+
+    def __init__(self, vocab_path, special_tokens=[], tokenizer_type='Bert'):
+        self.tokenizer_type = tokenizer_type
+        if tokenizer_type == 'Bert':
+            self.spec_convert_dict = {
+                '[BOS]': '[unused0]',
+                '[EOS]': '[unused1]'
+            }
+            for token in special_tokens:
+                if token not in self.spec_convert_dict and token not in [
+                        '[PAD]', '[UNK]'
+                ]:
+                    self.spec_convert_dict[
+                        token] = f'[unused{len(self.spec_convert_dict)}]'
+            self.spec_revert_dict = {
+                v: k
+                for k, v in self.spec_convert_dict.items()
+            }
+            special_tokens = [
+                self.spec_convert_dict.get(tok, tok) for tok in special_tokens
+            ]
+            self.special_tokens = ('[UNK]', '[SEP]', '[PAD]', '[CLS]',
+                                   '[MASK]')
+            self.special_tokens += tuple(x for x in special_tokens
+                                         if x not in self.special_tokens)
+
+            self._tokenizer = BertTokenizer(
+                vocab_path, never_split=self.special_tokens)
+            for tok in self.special_tokens:
+                '''
+                需要先保证special_tokens在词表中，这里设置special_tokens的目的是为了这些词能够完整占位，不再切分为子词；
+                若不在词表中，可以使用词表中的[unused]符号进行转换：spec_convert_dict；
+                '''
+                assert tok in self._tokenizer.vocab, f"special token '{tok}' is not in the vocabulary"
+            self.vocab_size = len(self._tokenizer.vocab)
+        elif tokenizer_type == 'GPT2':
+            self.spec_convert_dict = {'[UNK]': '<unk>'}
+            self.spec_revert_dict = {
+                v: k
+                for k, v in self.spec_convert_dict.items()
+            }
+            special_tokens = [
+                tok for tok in special_tokens
+                if tok not in self.spec_convert_dict
+            ]
+            vocab_file = os.path.join(vocab_path, 'vocab.json')
+            merges_file = os.path.join(vocab_path, 'merges.txt')
+            self._tokenizer = GPT2Tokenizer(
+                vocab_file, merges_file, special_tokens=special_tokens)
+            self.num_specials = len(special_tokens)
+            self.vocab_size = len(self._tokenizer)
+        else:
+            raise ValueError
+
+    def tokenize(self, text):
+        return self._tokenizer.tokenize(text)
+
+    def convert_tokens_to_ids(self, tokens):
+        if self.tokenizer_type == 'Bert':
+            tokens = [self.spec_convert_dict.get(tok, tok) for tok in tokens]
+            ids = self._tokenizer.convert_tokens_to_ids(tokens)
+            return ids
+        else:
+            tokens = [self.spec_convert_dict.get(tok, tok) for tok in tokens]
+            ids = self._tokenizer.convert_tokens_to_ids(tokens)
+            ids = [(i + self.num_specials) % self.vocab_size for i in ids]
+            return ids
+
+    def convert_ids_to_tokens(self, ids):
+        if self.tokenizer_type == 'Bert':
+            tokens = self._tokenizer.convert_ids_to_tokens(ids)
+            tokens = [self.spec_revert_dict.get(tok, tok) for tok in tokens]
+            return tokens
+        else:
+            ids = [(i - self.num_specials) % self.vocab_size for i in ids]
+            tokens = self._tokenizer.convert_ids_to_tokens(ids)
+            tokens = [self.spec_revert_dict.get(tok, tok) for tok in tokens]
+            return tokens
+
+    def decode(self, ids, ignore_tokens=[]):
+        tokens = self.convert_ids_to_tokens(ids)
+        if len(ignore_tokens) > 0:
+            ignore_tokens = set(ignore_tokens)
+            tokens = [tok for tok in tokens if tok not in ignore_tokens]
+        if self.tokenizer_type == 'Bert':
+            string = ' '.join(tokens).replace(' ##', '')
+        else:
+            string = ''.join(tokens)
+            string = bytearray([
+                self._tokenizer.byte_decoder[c] for c in string
+            ]).decode('utf-8')
+        string = clean_string(string)
+        return string
+
+
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes."""
+
+logger = logging.getLogger(__name__)
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, 'r', encoding='utf-8') as reader:
+        while True:
+            token = reader.readline()
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a piece of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class BertTokenizer(object):
+    """Runs end-to-end tokenization: punctuation splitting + wordpiece"""
+
+    def __init__(self,
+                 vocab_file,
+                 do_lower_case=True,
+                 max_len=None,
+                 do_basic_tokenize=True,
+                 never_split=('[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]')):
+        """Constructs a BertTokenizer.
+
+        Args:
+          vocab_file: Path to a one-wordpiece-per-line vocabulary file
+          do_lower_case: Whether to lower case the input
+                         Only has an effect when do_wordpiece_only=False
+          do_basic_tokenize: Whether to do basic tokenization before wordpiece.
+          max_len: An artificial maximum length to truncate tokenized sequences to;
+                         Effective maximum length is always the minimum of this
+                         value (if specified) and the underlying BERT model's
+                         sequence length.
+          never_split: List of tokens which will never be split during tokenization.
+                         Only has an effect when do_wordpiece_only=False
+        """
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                'model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`'
+                .format(vocab_file))
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict([
+            (ids, tok) for tok, ids in self.vocab.items()
+        ])
+        self.do_basic_tokenize = do_basic_tokenize
+        if do_basic_tokenize:
+            self.basic_tokenizer = BasicTokenizer(
+                do_lower_case=do_lower_case, never_split=never_split)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+        self.max_len = max_len if max_len is not None else int(1e12)
+
+    def tokenize(self, text):
+        split_tokens = []
+        if self.do_basic_tokenize:
+            for token in self.basic_tokenizer.tokenize(text):
+                for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                    split_tokens.append(sub_token)
+        else:
+            split_tokens = self.wordpiece_tokenizer.tokenize(text)
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        """Converts a sequence of tokens into ids using the vocab."""
+        ids = []
+        for token in tokens:
+            ids.append(self.vocab[token])
+        if len(ids) > self.max_len:
+            logger.warning(
+                'Token indices sequence length is longer than the specified maximum '
+                ' sequence length for this BERT model ({} > {}). Running this'
+                ' sequence through BERT will result in indexing errors'.format(
+                    len(ids), self.max_len))
+        return ids
+
+    def convert_ids_to_tokens(self, ids):
+        """Converts a sequence of ids in wordpiece tokens using the vocab."""
+        tokens = []
+        for i in ids:
+            tokens.append(self.ids_to_tokens[i])
+        return tokens
+
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self,
+                 do_lower_case=True,
+                 never_split=('[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]')):
+        """Constructs a BasicTokenizer.
+
+        Args:
+          do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+        self.never_split = never_split
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text."""
+        text = self._clean_text(text)
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case and token not in self.never_split:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(' '.join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize('NFD', text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == 'Mn':
+                continue
+            output.append(char)
+        return ''.join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        if text in self.never_split:
+            return [text]
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return [''.join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(' ')
+                output.append(char)
+                output.append(' ')
+            else:
+                output.append(char)
+        return ''.join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or (cp >= 0x3400 and cp <= 0x4DBF)
+                or (cp >= 0x20000 and cp <= 0x2A6DF)
+                or (cp >= 0x2A700 and cp <= 0x2B73F)
+                or (cp >= 0x2B740 and cp <= 0x2B81F)
+                or (cp >= 0x2B820 and cp <= 0x2CEAF)
+                or (cp >= 0xF900 and cp <= 0xFAFF)
+                or (cp >= 0x2F800 and cp <= 0x2FA1F)):
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(' ')
+            else:
+                output.append(char)
+        return ''.join(output)
+
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token='[UNK]', max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer`.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = ''.join(chars[start:end])
+                    if start > 0:
+                        substr = '##' + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == ' ' or char == '\t' or char == '\n' or char == '\r':
+        return True
+    cat = unicodedata.category(char)
+    if cat == 'Zs':
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == '\t' or char == '\n' or char == '\r':
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith('C'):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64)
+            or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith('P'):
+        return True
+    return False
+
+
+# Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes for OpenAI GPT."""
+
+try:
+    from functools import lru_cache
+except ImportError:
+    # Just a dummy decorator to get the checks to run on python2
+    # because honestly I don't want to support a byte-level unicode BPE tokenizer on python 2 right now.
+    def lru_cache():
+        return lambda func: func
+
+
+@lru_cache()
+def bytes_to_unicode():
+    """
+    Returns list of utf-8 byte and a corresponding list of unicode strings.
+    The reversible bpe codes work on unicode strings.
+    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
+    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
+    This is a signficant percentage of your normal, say, 32K bpe vocab.
+    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
+    And avoids mapping to whitespace/control characters the bpe code barfs on.
+    """
+    _chr = unichr if sys.version_info[0] == 2 else chr
+    bs = list(range(ord('!'),
+                    ord('~') + 1)) + list(range(
+                        ord('¡'),
+                        ord('¬') + 1)) + list(range(ord('®'),
+                                                    ord('ÿ') + 1))
+    cs = bs[:]
+    n = 0
+    for b in range(2**8):
+        if b not in bs:
+            bs.append(b)
+            cs.append(2**8 + n)
+            n += 1
+    cs = [_chr(n) for n in cs]
+    return dict(zip(bs, cs))
+
+
+def get_pairs(word):
+    """Return set of symbol pairs in a word.
+
+    Word is represented as tuple of symbols (symbols being variable-length strings).
+    """
+    pairs = set()
+    prev_char = word[0]
+    for char in word[1:]:
+        pairs.add((prev_char, char))
+        prev_char = char
+    return pairs
+
+
+class GPT2Tokenizer(object):
+    """
+    GPT-2 BPE tokenizer. Peculiarities:
+        - Byte-level BPE
+    """
+
+    def __init__(self,
+                 vocab_file,
+                 merges_file,
+                 errors='replace',
+                 special_tokens=None,
+                 max_len=None):
+        self.max_len = max_len if max_len is not None else int(1e12)
+        self.encoder = json.load(open(vocab_file))
+        self.decoder = {v: k for k, v in self.encoder.items()}
+        self.errors = errors  # how to handle errors in decoding
+        self.byte_encoder = bytes_to_unicode()
+        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
+        bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
+        bpe_merges = [tuple(merge.split()) for merge in bpe_data]
+        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
+        self.cache = {}
+
+        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
+        self.pat = re.compile(
+            r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
+        )
+
+        self.special_tokens = {}
+        self.special_tokens_decoder = {}
+        self.set_special_tokens(special_tokens)
+
+    def __len__(self):
+        return len(self.encoder) + len(self.special_tokens)
+
+    def set_special_tokens(self, special_tokens):
+        """ Add a list of additional tokens to the encoder.
+            The additional tokens are indexed starting from the last index of the
+            current vocabulary in the order of the `special_tokens` list.
+        """
+        if not special_tokens:
+            self.special_tokens = {}
+            self.special_tokens_decoder = {}
+            return
+        self.special_tokens = dict((tok, len(self.encoder) + i)
+                                   for i, tok in enumerate(special_tokens))
+        self.special_tokens_decoder = {
+            v: k
+            for k, v in self.special_tokens.items()
+        }
+        logger.info('Special tokens {}'.format(self.special_tokens))
+
+    def bpe(self, token):
+        if token in self.cache:
+            return self.cache[token]
+        word = tuple(token)
+        pairs = get_pairs(word)
+
+        if not pairs:
+            return token
+
+        while True:
+            bigram = min(
+                pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf')))
+            if bigram not in self.bpe_ranks:
+                break
+            first, second = bigram
+            new_word = []
+            i = 0
+            while i < len(word):
+                try:
+                    j = word.index(first, i)
+                    new_word.extend(word[i:j])
+                    i = j
+                except:
+                    new_word.extend(word[i:])
+                    break
+
+                if word[i] == first and i < len(word) - 1 and word[
+                        i + 1] == second:
+                    new_word.append(first + second)
+                    i += 2
+                else:
+                    new_word.append(word[i])
+                    i += 1
+            new_word = tuple(new_word)
+            word = new_word
+            if len(word) == 1:
+                break
+            else:
+                pairs = get_pairs(word)
+        word = ' '.join(word)
+        self.cache[token] = word
+        return word
+
+    def tokenize(self, text):
+        """ Tokenize a string. """
+        bpe_tokens = []
+        for token in re.findall(self.pat, text):
+            token = ''.join(self.byte_encoder[ord(b)] for b in token
+                            if ord(b) in self.byte_encoder)
+            if token == '':
+                continue
+            bpe_tokens.extend(
+                bpe_token for bpe_token in self.bpe(token).split(' '))
+        return bpe_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        """ Converts a sequence of tokens into ids using the vocab. """
+        ids = []
+        if isinstance(tokens, str) or (sys.version_info[0] == 2
+                                       and isinstance(tokens, unicode)):
+            if tokens in self.special_tokens:
+                return self.special_tokens[tokens]
+            else:
+                return self.encoder.get(tokens, 0)
+        for token in tokens:
+            if token in self.special_tokens:
+                ids.append(self.special_tokens[token])
+            else:
+                ids.append(self.encoder.get(token, 0))
+        if len(ids) > self.max_len:
+            logger.warning(
+                'Token indices sequence length is longer than the specified maximum '
+                ' sequence length for this OpenAI GPT model ({} > {}). Running this'
+                ' sequence through the model will result in indexing errors'.
+                format(len(ids), self.max_len))
+        return ids
+
+    def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
+        """Converts a sequence of ids in BPE tokens using the vocab."""
+        tokens = []
+        for i in ids:
+            if i in self.special_tokens_decoder:
+                if not skip_special_tokens:
+                    tokens.append(self.special_tokens_decoder[i])
+            else:
+                tokens.append(self.decoder[i])
+        return tokens
+
+    def encode(self, text):
+        return self.convert_tokens_to_ids(self.tokenize(text))
+
+    def decode(self, tokens):
+        text = ''.join([self.decoder[token] for token in tokens])
+        text = bytearray([self.byte_decoder[c] for c in text]).decode(
+            'utf-8', errors=self.errors)
+        return text
diff --git a/modelscope/pydatasets/__init__.py b/modelscope/pydatasets/__init__.py
new file mode 100644
index 00000000..a1ed1d93
--- /dev/null
+++ b/modelscope/pydatasets/__init__.py
@@ -0,0 +1 @@
+from .py_dataset import PyDataset
diff --git a/modelscope/pydatasets/py_dataset.py b/modelscope/pydatasets/py_dataset.py
new file mode 100644
index 00000000..7d0edadb
--- /dev/null
+++ b/modelscope/pydatasets/py_dataset.py
@@ -0,0 +1,126 @@
+import logging
+from typing import (Any, Callable, Dict, List, Mapping, Optional, Sequence,
+                    Union)
+
+from datasets import Dataset, load_dataset
+
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class PyDataset:
+    _hf_ds = None  # holds the underlying HuggingFace Dataset
+    """A PyDataset backed by hugging face Dataset."""
+
+    def __init__(self, hf_ds: Dataset):
+        self._hf_ds = hf_ds
+        self.target = None
+
+    def __iter__(self):
+        if isinstance(self._hf_ds, Dataset):
+            for item in self._hf_ds:
+                if self.target is not None:
+                    yield item[self.target]
+                else:
+                    yield item
+        else:
+            for ds in self._hf_ds.values():
+                for item in ds:
+                    if self.target is not None:
+                        yield item[self.target]
+                    else:
+                        yield item
+
+    @classmethod
+    def from_hf_dataset(cls,
+                        hf_ds: Dataset,
+                        target: str = None) -> 'PyDataset':
+        dataset = cls(hf_ds)
+        dataset.target = target
+        return dataset
+
+    @staticmethod
+    def load(
+        path: Union[str, list],
+        target: Optional[str] = None,
+        version: Optional[str] = None,
+        name: Optional[str] = None,
+        split: Optional[str] = None,
+        data_dir: Optional[str] = None,
+        data_files: Optional[Union[str, Sequence[str],
+                                   Mapping[str, Union[str,
+                                                      Sequence[str]]]]] = None
+    ) -> 'PyDataset':
+        """Load a PyDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
+            Args:
+
+                path (str): Path or name of the dataset.
+                target (str, optional): Name of the column to output.
+                version (str, optional): Version of the dataset script to load:
+                name (str, optional): Defining the subset_name of the dataset.
+                data_dir (str, optional): Defining the data_dir of the dataset configuration. I
+                data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s).
+                split (str, optional): Which split of the data to load.
+
+            Returns:
+                PyDataset (obj:`PyDataset`): PyDataset object for a certain dataset.
+            """
+        if isinstance(path, str):
+            dataset = load_dataset(
+                path,
+                name=name,
+                revision=version,
+                split=split,
+                data_dir=data_dir,
+                data_files=data_files)
+        elif isinstance(path, list):
+            if target is None:
+                target = 'target'
+            dataset = Dataset.from_dict({target: [p] for p in path})
+        else:
+            raise TypeError('path must be a str or a list, but got'
+                            f' {type(path)}')
+        return PyDataset.from_hf_dataset(dataset, target=target)
+
+    def to_torch_dataset(
+        self,
+        columns: Union[str, List[str]] = None,
+        output_all_columns: bool = False,
+        **format_kwargs,
+    ):
+        self._hf_ds.reset_format()
+        self._hf_ds.set_format(
+            type='torch',
+            columns=columns,
+            output_all_columns=output_all_columns,
+            format_kwargs=format_kwargs)
+        return self._hf_ds
+
+    def to_tf_dataset(
+        self,
+        columns: Union[str, List[str]],
+        batch_size: int,
+        shuffle: bool,
+        collate_fn: Callable,
+        drop_remainder: bool = None,
+        collate_fn_args: Dict[str, Any] = None,
+        label_cols: Union[str, List[str]] = None,
+        dummy_labels: bool = False,
+        prefetch: bool = True,
+    ):
+        self._hf_ds.reset_format()
+        return self._hf_ds.to_tf_dataset(
+            columns,
+            batch_size,
+            shuffle,
+            collate_fn,
+            drop_remainder=drop_remainder,
+            collate_fn_args=collate_fn_args,
+            label_cols=label_cols,
+            dummy_labels=dummy_labels,
+            prefetch=prefetch)
+
+    def to_hf_dataset(self) -> Dataset:
+        self._hf_ds.reset_format()
+        return self._hf_ds
diff --git a/maas_lib/tools/eval.py b/modelscope/tools/eval.py
similarity index 94%
rename from maas_lib/tools/eval.py
rename to modelscope/tools/eval.py
index 95bf7054..ca39932d 100644
--- a/maas_lib/tools/eval.py
+++ b/modelscope/tools/eval.py
@@ -2,7 +2,7 @@
 
 import argparse
 
-from maas_lib.trainers import build_trainer
+from modelscope.trainers import build_trainer
 
 
 def parse_args():
diff --git a/maas_lib/tools/train.py b/modelscope/tools/train.py
similarity index 92%
rename from maas_lib/tools/train.py
rename to modelscope/tools/train.py
index f7c2b54b..c6f1ef5f 100644
--- a/maas_lib/tools/train.py
+++ b/modelscope/tools/train.py
@@ -2,7 +2,7 @@
 
 import argparse
 
-from maas_lib.trainers import build_trainer
+from modelscope.trainers import build_trainer
 
 
 def parse_args():
diff --git a/maas_lib/trainers/__init__.py b/modelscope/trainers/__init__.py
similarity index 100%
rename from maas_lib/trainers/__init__.py
rename to modelscope/trainers/__init__.py
diff --git a/maas_lib/trainers/base.py b/modelscope/trainers/base.py
similarity index 96%
rename from maas_lib/trainers/base.py
rename to modelscope/trainers/base.py
index 2c11779e..372938b4 100644
--- a/maas_lib/trainers/base.py
+++ b/modelscope/trainers/base.py
@@ -3,8 +3,8 @@
 from abc import ABC, abstractmethod
 from typing import Callable, Dict, List, Optional, Tuple, Union
 
-from maas_lib.trainers.builder import TRAINERS
-from maas_lib.utils.config import Config
+from modelscope.trainers.builder import TRAINERS
+from modelscope.utils.config import Config
 
 
 class BaseTrainer(ABC):
diff --git a/maas_lib/trainers/builder.py b/modelscope/trainers/builder.py
similarity index 77%
rename from maas_lib/trainers/builder.py
rename to modelscope/trainers/builder.py
index 2165fe58..2192d46c 100644
--- a/maas_lib/trainers/builder.py
+++ b/modelscope/trainers/builder.py
@@ -1,8 +1,8 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-from maas_lib.utils.config import ConfigDict
-from maas_lib.utils.constant import Tasks
-from maas_lib.utils.registry import Registry, build_from_cfg
+from modelscope.utils.config import ConfigDict
+from modelscope.utils.constant import Tasks
+from modelscope.utils.registry import Registry, build_from_cfg
 
 TRAINERS = Registry('trainers')
 
diff --git a/maas_lib/trainers/nlp/__init__.py b/modelscope/trainers/nlp/__init__.py
similarity index 100%
rename from maas_lib/trainers/nlp/__init__.py
rename to modelscope/trainers/nlp/__init__.py
diff --git a/maas_lib/trainers/nlp/sequence_classification_trainer.py b/modelscope/trainers/nlp/sequence_classification_trainer.py
similarity index 98%
rename from maas_lib/trainers/nlp/sequence_classification_trainer.py
rename to modelscope/trainers/nlp/sequence_classification_trainer.py
index f2264c0d..b2b759fa 100644
--- a/maas_lib/trainers/nlp/sequence_classification_trainer.py
+++ b/modelscope/trainers/nlp/sequence_classification_trainer.py
@@ -3,8 +3,8 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 
-from maas_lib.utils.constant import Tasks
-from maas_lib.utils.logger import get_logger
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
 from ..base import BaseTrainer
 from ..builder import TRAINERS
 
diff --git a/modelscope/trainers/nlp/space/__init__.py b/modelscope/trainers/nlp/space/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/trainers/nlp/space/metrics/__init__.py b/modelscope/trainers/nlp/space/metrics/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/maas_lib/trainers/nlp/space/metrics/metrics_tracker.py b/modelscope/trainers/nlp/space/metrics/metrics_tracker.py
similarity index 100%
rename from maas_lib/trainers/nlp/space/metrics/metrics_tracker.py
rename to modelscope/trainers/nlp/space/metrics/metrics_tracker.py
diff --git a/modelscope/trainers/nlp/space/trainers/__init__.py b/modelscope/trainers/nlp/space/trainers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/maas_lib/trainers/nlp/space/trainers/gen_trainer.py b/modelscope/trainers/nlp/space/trainers/gen_trainer.py
similarity index 100%
rename from maas_lib/trainers/nlp/space/trainers/gen_trainer.py
rename to modelscope/trainers/nlp/space/trainers/gen_trainer.py
diff --git a/maas_lib/trainers/nlp/space/trainers/intent_trainer.py b/modelscope/trainers/nlp/space/trainers/intent_trainer.py
similarity index 99%
rename from maas_lib/trainers/nlp/space/trainers/intent_trainer.py
rename to modelscope/trainers/nlp/space/trainers/intent_trainer.py
index 9db24e6d..9a4bb799 100644
--- a/maas_lib/trainers/nlp/space/trainers/intent_trainer.py
+++ b/modelscope/trainers/nlp/space/trainers/intent_trainer.py
@@ -14,8 +14,9 @@ import torch
 from tqdm import tqdm
 from transformers.optimization import AdamW, get_linear_schedule_with_warmup
 
-from maas_lib.trainers.nlp.space.metrics.metrics_tracker import MetricsTracker
-from maas_lib.utils.nlp.space.args import str2bool
+from modelscope.trainers.nlp.space.metrics.metrics_tracker import \
+    MetricsTracker
+from modelscope.utils.nlp.space.args import str2bool
 
 
 def get_logger(log_path, name='default'):
diff --git a/modelscope/utils/__init__.py b/modelscope/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/maas_lib/utils/config.py b/modelscope/utils/config.py
similarity index 98%
rename from maas_lib/utils/config.py
rename to modelscope/utils/config.py
index 7d67d248..d0f3f657 100644
--- a/maas_lib/utils/config.py
+++ b/modelscope/utils/config.py
@@ -17,9 +17,9 @@ from typing import Dict
 import addict
 from yapf.yapflib.yapf_api import FormatCode
 
-from maas_lib.utils.logger import get_logger
-from maas_lib.utils.pymod import (import_modules, import_modules_from_file,
-                                  validate_py_syntax)
+from modelscope.utils.logger import get_logger
+from modelscope.utils.pymod import (import_modules, import_modules_from_file,
+                                    validate_py_syntax)
 
 if platform.system() == 'Windows':
     import regex as re  # type: ignore
@@ -117,7 +117,7 @@ class Config:
                 # delete imported module
                 del sys.modules[module_nanme]
             elif filename.endswith(('.yml', '.yaml', '.json')):
-                from maas_lib.fileio import load
+                from modelscope.fileio import load
                 cfg_dict = load(tmp_cfg_file.name)
             # close temp file
             tmp_cfg_file.close()
@@ -364,7 +364,7 @@ class Config:
             file (str, optional): Path of the output file where the config
                 will be dumped. Defaults to None.
         """
-        from maas_lib.fileio import dump
+        from modelscope.fileio import dump
         cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict()
         if file is None:
             if self.filename is None or self.filename.endswith('.py'):
diff --git a/maas_lib/utils/constant.py b/modelscope/utils/constant.py
similarity index 97%
rename from maas_lib/utils/constant.py
rename to modelscope/utils/constant.py
index 17e76309..41c9443b 100644
--- a/maas_lib/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -13,7 +13,7 @@ class Fields(object):
 
 
 class Tasks(object):
-    """ Names for tasks supported by maas lib.
+    """ Names for tasks supported by modelscope.
 
     Holds the standard task name to use for identifying different tasks.
     This should be used to register models, pipelines, trainers.
diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py
new file mode 100644
index 00000000..2f61b148
--- /dev/null
+++ b/modelscope/utils/hub.py
@@ -0,0 +1,14 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+
+from maas_hub.constants import MODEL_ID_SEPARATOR
+
+
+# temp solution before the hub-cache is in place
+def get_model_cache_dir(model_id: str, branch: str = 'master'):
+    model_id_expanded = model_id.replace('/',
+                                         MODEL_ID_SEPARATOR) + '.' + branch
+    default_cache_dir = os.path.expanduser(os.path.join('~/.cache', 'maas'))
+    return os.getenv('MAAS_CACHE',
+                     os.path.join(default_cache_dir, 'hub', model_id_expanded))
diff --git a/maas_lib/utils/logger.py b/modelscope/utils/logger.py
similarity index 100%
rename from maas_lib/utils/logger.py
rename to modelscope/utils/logger.py
diff --git a/modelscope/utils/nlp/__init__.py b/modelscope/utils/nlp/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/utils/nlp/space/__init__.py b/modelscope/utils/nlp/space/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/maas_lib/utils/nlp/space/args.py b/modelscope/utils/nlp/space/args.py
similarity index 100%
rename from maas_lib/utils/nlp/space/args.py
rename to modelscope/utils/nlp/space/args.py
diff --git a/maas_lib/utils/nlp/space/criterions.py b/modelscope/utils/nlp/space/criterions.py
similarity index 100%
rename from maas_lib/utils/nlp/space/criterions.py
rename to modelscope/utils/nlp/space/criterions.py
diff --git a/maas_lib/utils/nlp/space/db_ops.py b/modelscope/utils/nlp/space/db_ops.py
similarity index 100%
rename from maas_lib/utils/nlp/space/db_ops.py
rename to modelscope/utils/nlp/space/db_ops.py
diff --git a/maas_lib/utils/nlp/space/ontology.py b/modelscope/utils/nlp/space/ontology.py
similarity index 100%
rename from maas_lib/utils/nlp/space/ontology.py
rename to modelscope/utils/nlp/space/ontology.py
diff --git a/maas_lib/utils/nlp/space/scores.py b/modelscope/utils/nlp/space/scores.py
similarity index 100%
rename from maas_lib/utils/nlp/space/scores.py
rename to modelscope/utils/nlp/space/scores.py
diff --git a/maas_lib/utils/nlp/space/utils.py b/modelscope/utils/nlp/space/utils.py
similarity index 100%
rename from maas_lib/utils/nlp/space/utils.py
rename to modelscope/utils/nlp/space/utils.py
diff --git a/maas_lib/utils/pymod.py b/modelscope/utils/pymod.py
similarity index 98%
rename from maas_lib/utils/pymod.py
rename to modelscope/utils/pymod.py
index 4f717480..6db6798d 100644
--- a/maas_lib/utils/pymod.py
+++ b/modelscope/utils/pymod.py
@@ -7,7 +7,7 @@ import sys
 import types
 from importlib import import_module
 
-from maas_lib.utils.logger import get_logger
+from modelscope.utils.logger import get_logger
 
 logger = get_logger()
 
diff --git a/maas_lib/utils/registry.py b/modelscope/utils/registry.py
similarity index 94%
rename from maas_lib/utils/registry.py
rename to modelscope/utils/registry.py
index 838e6f83..73a938ea 100644
--- a/maas_lib/utils/registry.py
+++ b/modelscope/utils/registry.py
@@ -3,7 +3,7 @@
 import inspect
 from email.policy import default
 
-from maas_lib.utils.logger import get_logger
+from modelscope.utils.logger import get_logger
 
 default_group = 'default'
 logger = get_logger()
@@ -100,6 +100,12 @@ class Registry(object):
             >>> class SwinTransformerDefaultGroup:
             >>>     pass
 
+            >>> class SwinTransformer2:
+            >>>     pass
+            >>> MODELS.register_module('image-classification',
+                                        module_name='SwinT2',
+                                        module_cls=SwinTransformer2)
+
         Args:
             group_key: Group name of which module will be registered,
                 default group name is 'default'
@@ -168,7 +174,7 @@ def build_from_cfg(cfg,
                 '`cfg` or `default_args` must contain the key "type", '
                 f'but got {cfg}\n{default_args}')
     if not isinstance(registry, Registry):
-        raise TypeError('registry must be an maas_lib.Registry object, '
+        raise TypeError('registry must be an modelscope.Registry object, '
                         f'but got {type(registry)}')
     if not (isinstance(default_args, dict) or default_args is None):
         raise TypeError('default_args must be a dict or None, '
diff --git a/maas_lib/utils/type_assert.py b/modelscope/utils/type_assert.py
similarity index 100%
rename from maas_lib/utils/type_assert.py
rename to modelscope/utils/type_assert.py
diff --git a/maas_lib/version.py b/modelscope/version.py
similarity index 100%
rename from maas_lib/version.py
rename to modelscope/version.py
diff --git a/requirements.txt b/requirements.txt
index 999c567e..39eb5e23 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,5 @@
 -r requirements/runtime.txt
 -r requirements/pipeline.txt
+-r requirements/multi-modal.txt
+-r requirements/nlp.txt
+-r requirements/cv.txt
diff --git a/requirements/cv.txt b/requirements/cv.txt
new file mode 100644
index 00000000..66799b76
--- /dev/null
+++ b/requirements/cv.txt
@@ -0,0 +1 @@
+easydict
diff --git a/requirements/maas.txt b/requirements/maas.txt
deleted file mode 100644
index 66b9aeca..00000000
--- a/requirements/maas.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/maas/maas_lib-0.1.1-py3-none-any.whl
-https://maashub.oss-cn-hangzhou.aliyuncs.com/releases/maas_hub-0.1.0.dev0-py2.py3-none-any.whl
-https://mit-dataset.oss-cn-beijing.aliyuncs.com/release/ali_maas_datasets-0.0.1.dev0-py3-none-any.whl
diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt
new file mode 100644
index 00000000..ad641b63
--- /dev/null
+++ b/requirements/multi-modal.txt
@@ -0,0 +1,9 @@
+datasets
+einops
+ftfy>=6.0.3
+https://jirenmr.oss-cn-zhangjiakou.aliyuncs.com/ofa/fairseq-maas-py3-none-any.whl
+https://jirenmr.oss-cn-zhangjiakou.aliyuncs.com/ofa/ofa-0.0.2-py3-none-any.whl
+pycocoevalcap>=1.2
+pycocotools>=2.0.4
+rouge_score
+timm
diff --git a/requirements/nlp.txt b/requirements/nlp.txt
new file mode 100644
index 00000000..8de83798
--- /dev/null
+++ b/requirements/nlp.txt
@@ -0,0 +1 @@
+https://alinlp.alibaba-inc.com/pypi/sofa-1.0.1.3-py3-none-any.whl
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index 5d24e660..47a11cbc 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,6 +1,7 @@
 addict
+datasets
+easydict
 https://maashub.oss-cn-hangzhou.aliyuncs.com/releases/maas_hub-0.1.0.dev0-py2.py3-none-any.whl
-https://mit-dataset.oss-cn-beijing.aliyuncs.com/release/ali_maas_datasets-0.0.1.dev0-py3-none-any.whl
 numpy
 opencv-python-headless
 Pillow
diff --git a/setup.cfg b/setup.cfg
index 8feaa182..0b929b04 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,7 +2,7 @@
 line_length = 79
 multi_line_output = 0
 known_standard_library = setuptools
-known_first_party = maas_lib
+known_first_party = modelscope
 known_third_party = json,yaml
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
diff --git a/setup.py b/setup.py
index b9044bff..b027c4cb 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@ def readme():
     return content
 
 
-version_file = 'maas_lib/version.py'
+version_file = 'modelscope/version.py'
 
 
 def get_git_hash():
@@ -155,8 +155,8 @@ def pack_resource():
         shutil.rmtree(root_dir)
     os.makedirs(root_dir)
 
-    proj_dir = root_dir + 'maas_lib/'
-    shutil.copytree('./maas_lib', proj_dir)
+    proj_dir = root_dir + 'modelscope/'
+    shutil.copytree('./modelscope', proj_dir)
     shutil.copytree('./configs', proj_dir + 'configs')
     shutil.copytree('./requirements', 'package/requirements')
     shutil.copy('./requirements.txt', 'package/requirements.txt')
@@ -170,13 +170,13 @@ if __name__ == '__main__':
     os.chdir('package')
     install_requires, deps_link = parse_requirements('requirements.txt')
     setup(
-        name='maas-lib',
+        name='model-scope',
         version=get_version(),
         description='',
         long_description=readme(),
         long_description_content_type='text/markdown',
-        author='Alibaba MaaS team',
-        author_email='maas_lib@list.alibaba-inc.com',
+        author='Alibaba ModelScope team',
+        author_email='modelscope@list.alibaba-inc.com',
         keywords='',
         url='TBD',
         packages=find_packages(exclude=('configs', 'tools', 'demo')),
diff --git a/tests/fileio/test_file.py b/tests/fileio/test_file.py
index 9f83f02c..0be41b42 100644
--- a/tests/fileio/test_file.py
+++ b/tests/fileio/test_file.py
@@ -5,7 +5,7 @@ import unittest
 
 from requests import HTTPError
 
-from maas_lib.fileio.file import File, HTTPStorage, LocalStorage
+from modelscope.fileio.file import File, HTTPStorage, LocalStorage
 
 
 class FileTest(unittest.TestCase):
diff --git a/tests/fileio/test_io.py b/tests/fileio/test_io.py
index 1e202e5b..0a80d3f7 100644
--- a/tests/fileio/test_io.py
+++ b/tests/fileio/test_io.py
@@ -2,7 +2,7 @@
 import tempfile
 import unittest
 
-from maas_lib.fileio.io import dump, dumps, load
+from modelscope.fileio.io import dump, dumps, load
 
 
 class FileIOTest(unittest.TestCase):
diff --git a/tests/pipelines/nlp/test_dialog_generation.py b/tests/pipelines/nlp/test_dialog_generation.py
index 413e70b5..8ec8e17a 100644
--- a/tests/pipelines/nlp/test_dialog_generation.py
+++ b/tests/pipelines/nlp/test_dialog_generation.py
@@ -6,9 +6,9 @@ import unittest
 
 from tests.case.nlp.dialog_generation_case import test_case
 
-from maas_lib.models.nlp import DialogGenerationModel
-from maas_lib.pipelines import DialogGenerationPipeline, pipeline
-from maas_lib.preprocessors import DialogGenerationPreprocessor
+from modelscope.models.nlp import DialogGenerationModel
+from modelscope.pipelines import DialogGenerationPipeline, pipeline
+from modelscope.preprocessors import DialogGenerationPreprocessor
 
 
 def merge(info, result):
diff --git a/tests/pipelines/nlp/test_dialog_intent.py b/tests/pipelines/nlp/test_dialog_intent.py
index 86e78d06..11665762 100644
--- a/tests/pipelines/nlp/test_dialog_intent.py
+++ b/tests/pipelines/nlp/test_dialog_intent.py
@@ -6,10 +6,10 @@ import unittest
 
 from tests.case.nlp.dialog_intent_case import test_case
 
-from maas_lib.models.nlp import DialogIntentModel
-from maas_lib.pipelines import DialogIntentPipeline, pipeline
-from maas_lib.preprocessors import DialogIntentPreprocessor
-from maas_lib.utils.constant import Tasks
+from modelscope.models.nlp import DialogIntentModel
+from modelscope.pipelines import DialogIntentPipeline, pipeline
+from modelscope.preprocessors import DialogIntentPreprocessor
+from modelscope.utils.constant import Tasks
 
 
 class DialogGenerationTest(unittest.TestCase):
@@ -28,7 +28,7 @@ class DialogGenerationTest(unittest.TestCase):
         # pipeline1 = pipeline(task=Tasks.dialog_intent, model=model, preprocessor=preprocessor)
 
         for item in test_case:
-            pipeline1(item)
+            print(pipeline1(item))
 
 
 if __name__ == '__main__':
diff --git a/tests/pipelines/test_base.py b/tests/pipelines/test_base.py
index d523e7c4..14f646a9 100644
--- a/tests/pipelines/test_base.py
+++ b/tests/pipelines/test_base.py
@@ -6,11 +6,11 @@ from typing import Any, Dict, List, Tuple, Union
 import numpy as np
 import PIL
 
-from maas_lib.pipelines import Pipeline, pipeline
-from maas_lib.pipelines.builder import PIPELINES
-from maas_lib.utils.constant import Tasks
-from maas_lib.utils.logger import get_logger
-from maas_lib.utils.registry import default_group
+from modelscope.pipelines import Pipeline, pipeline
+from modelscope.pipelines.builder import PIPELINES, add_default_pipeline_info
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.registry import default_group
 
 logger = get_logger()
 
@@ -53,7 +53,7 @@ class CustomPipelineTest(unittest.TestCase):
 
                 """
                 if not isinstance(input, PIL.Image.Image):
-                    from maas_lib.preprocessors import load_image
+                    from modelscope.preprocessors import load_image
                     data_dict = {'img': load_image(input), 'url': input}
                 else:
                     data_dict = {'img': input}
@@ -75,6 +75,7 @@ class CustomPipelineTest(unittest.TestCase):
                 return inputs
 
         self.assertTrue('custom-image' in PIPELINES.modules[default_group])
+        add_default_pipeline_info(Tasks.image_tagging, 'custom-image')
         pipe = pipeline(pipeline_name='custom-image')
         pipe2 = pipeline(Tasks.image_tagging)
         self.assertTrue(type(pipe) is type(pipe2))
diff --git a/tests/pipelines/test_builder.py b/tests/pipelines/test_builder.py
new file mode 100644
index 00000000..a0b15a32
--- /dev/null
+++ b/tests/pipelines/test_builder.py
@@ -0,0 +1,68 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import unittest
+from asyncio import Task
+from typing import Any, Dict, List, Tuple, Union
+
+import numpy as np
+import PIL
+
+from modelscope.models.base import Model
+from modelscope.pipelines import Pipeline, pipeline
+from modelscope.pipelines.builder import PIPELINES, add_default_pipeline_info
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+from modelscope.utils.registry import default_group
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(
+    group_key=Tasks.image_tagging, module_name='custom_single_model')
+class CustomSingleModelPipeline(Pipeline):
+
+    def __init__(self,
+                 config_file: str = None,
+                 model: List[Union[str, Model]] = None,
+                 preprocessor=None,
+                 **kwargs):
+        super().__init__(config_file, model, preprocessor, **kwargs)
+        assert isinstance(model, str), 'model is not str'
+        print(model)
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return super().postprocess(inputs)
+
+
+@PIPELINES.register_module(
+    group_key=Tasks.image_tagging, module_name='model1_model2')
+class CustomMultiModelPipeline(Pipeline):
+
+    def __init__(self,
+                 config_file: str = None,
+                 model: List[Union[str, Model]] = None,
+                 preprocessor=None,
+                 **kwargs):
+        super().__init__(config_file, model, preprocessor, **kwargs)
+        assert isinstance(model, list), 'model is not list'
+        for m in model:
+            assert isinstance(m, str), 'submodel is not str'
+            print(m)
+
+    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        return super().postprocess(inputs)
+
+
+class PipelineInterfaceTest(unittest.TestCase):
+
+    def test_single_model(self):
+        pipe = pipeline(Tasks.image_tagging, model='custom_single_model')
+        assert isinstance(pipe, CustomSingleModelPipeline)
+
+    def test_multi_model(self):
+        pipe = pipeline(Tasks.image_tagging, model=['model1', 'model2'])
+        assert isinstance(pipe, CustomMultiModelPipeline)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_image_captioning.py b/tests/pipelines/test_image_captioning.py
new file mode 100644
index 00000000..5584d0e2
--- /dev/null
+++ b/tests/pipelines/test_image_captioning.py
@@ -0,0 +1,36 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import os
+import tempfile
+import unittest
+
+from modelscope.fileio import File
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+
+class ImageCaptionTest(unittest.TestCase):
+
+    @unittest.skip('skip long test')
+    def test_run(self):
+        model = 'https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_large_best_clean.pt'
+
+        os.system(
+            'wget  https://jirenmr.oss-cn-zhangjiakou.aliyuncs.com/ofa/BPE.zip'
+        )
+        os.system('unzip BPE.zip')
+        bpe_dir = './BPE'
+
+        with tempfile.NamedTemporaryFile('wb', suffix='.pb') as ofile:
+            ofile.write(File.read(model))
+            img_captioning = pipeline(
+                Tasks.image_captioning, model=ofile.name, bpe_dir=bpe_dir)
+
+            result = img_captioning(
+                'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
+            )
+            print(result['caption'])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index 26847389..53006317 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -1,19 +1,28 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-
 import os.path as osp
+import shutil
 import tempfile
 import unittest
 
 import cv2
-from ali_maas_datasets import PyDataset
 
-from maas_lib.fileio import File
-from maas_lib.pipelines import pipeline
-from maas_lib.utils.constant import Tasks
+from modelscope.fileio import File
+from modelscope.pipelines import pipeline
+from modelscope.pydatasets import PyDataset
+from modelscope.utils.constant import Tasks
+from modelscope.utils.hub import get_model_cache_dir
 
 
 class ImageMattingTest(unittest.TestCase):
 
+    def setUp(self) -> None:
+        self.model_id = 'damo/image-matting-person'
+        # switch to False if downloading everytime is not desired
+        purge_cache = True
+        if purge_cache:
+            shutil.rmtree(
+                get_model_cache_dir(self.model_id), ignore_errors=True)
+
     def test_run(self):
         model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \
                      '.com/data/test/maas/image_matting/matting_person.pb'
@@ -36,16 +45,23 @@ class ImageMattingTest(unittest.TestCase):
         # input_location = '/dir/to/images'
 
         dataset = PyDataset.load(input_location, target='image')
-        img_matting = pipeline(
-            Tasks.image_matting, model='damo/image-matting-person')
+        img_matting = pipeline(Tasks.image_matting, model=self.model_id)
         # note that for dataset output, the inference-output is a Generator that can be iterated.
         result = img_matting(dataset)
         cv2.imwrite('result.png', next(result)['output_png'])
         print(f'Output written to {osp.abspath("result.png")}')
 
     def test_run_modelhub(self):
-        img_matting = pipeline(
-            Tasks.image_matting, model='damo/image-matting-person')
+        img_matting = pipeline(Tasks.image_matting, model=self.model_id)
+
+        result = img_matting(
+            'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
+        )
+        cv2.imwrite('result.png', result['output_png'])
+        print(f'Output written to {osp.abspath("result.png")}')
+
+    def test_run_modelhub_default_model(self):
+        img_matting = pipeline(Tasks.image_matting)
 
         result = img_matting(
             'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
diff --git a/tests/pipelines/test_person_image_cartoon.py b/tests/pipelines/test_person_image_cartoon.py
new file mode 100644
index 00000000..6f352e42
--- /dev/null
+++ b/tests/pipelines/test_person_image_cartoon.py
@@ -0,0 +1,49 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import os.path as osp
+import unittest
+
+import cv2
+
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.base import Pipeline
+from modelscope.utils.constant import Tasks
+
+
+class ImageCartoonTest(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.model_id = 'damo/cv_unet_person-image-cartoon_compound-models'
+        self.test_image = \
+            'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com' \
+            '/data/test/maas/image_carton/test.png'
+
+    def pipeline_inference(self, pipeline: Pipeline, input_location: str):
+        result = pipeline(input_location)
+        if result is not None:
+            cv2.imwrite('result.png', result['output_png'])
+            print(f'Output written to {osp.abspath("result.png")}')
+
+    @unittest.skip('deprecated, download model from model hub instead')
+    def test_run_by_direct_model_download(self):
+        model_dir = './assets'
+        if not os.path.exists(model_dir):
+            os.system(
+                'wget https://invi-label.oss-cn-shanghai.aliyuncs.com/label/model/cartoon/assets.zip'
+            )
+            os.system('unzip assets.zip')
+
+        img_cartoon = pipeline(Tasks.image_generation, model=model_dir)
+        self.pipeline_inference(img_cartoon, self.test_image)
+
+    def test_run_modelhub(self):
+        img_cartoon = pipeline(Tasks.image_generation, model=self.model_id)
+        self.pipeline_inference(img_cartoon, self.test_image)
+
+    def test_run_modelhub_default_model(self):
+        img_cartoon = pipeline(Tasks.image_generation)
+        self.pipeline_inference(img_cartoon, self.test_image)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py
index 45b584af..3e3faa1d 100644
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -1,21 +1,29 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-import tempfile
+import shutil
 import unittest
 import zipfile
 from pathlib import Path
 
-from ali_maas_datasets import PyDataset
-
-from maas_lib.fileio import File
-from maas_lib.models import Model
-from maas_lib.models.nlp import SequenceClassificationModel
-from maas_lib.pipelines import SequenceClassificationPipeline, pipeline
-from maas_lib.preprocessors import SequenceClassificationPreprocessor
-from maas_lib.utils.constant import Tasks
+from modelscope.fileio import File
+from modelscope.models import Model
+from modelscope.models.nlp import BertForSequenceClassification
+from modelscope.pipelines import SequenceClassificationPipeline, pipeline
+from modelscope.preprocessors import SequenceClassificationPreprocessor
+from modelscope.pydatasets import PyDataset
+from modelscope.utils.constant import Tasks
+from modelscope.utils.hub import get_model_cache_dir
 
 
 class SequenceClassificationTest(unittest.TestCase):
 
+    def setUp(self) -> None:
+        self.model_id = 'damo/bert-base-sst2'
+        # switch to False if downloading everytime is not desired
+        purge_cache = True
+        if purge_cache:
+            shutil.rmtree(
+                get_model_cache_dir(self.model_id), ignore_errors=True)
+
     def predict(self, pipeline_ins: SequenceClassificationPipeline):
         from easynlp.appzoo import load_dataset
 
@@ -29,6 +37,12 @@ class SequenceClassificationTest(unittest.TestCase):
 
         print(data)
 
+    def printDataset(self, dataset: PyDataset):
+        for i, r in enumerate(dataset):
+            if i > 10:
+                break
+            print(r)
+
     def test_run(self):
         model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \
                     '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip'
@@ -44,7 +58,7 @@ class SequenceClassificationTest(unittest.TestCase):
         with zipfile.ZipFile(cache_path_str, 'r') as zipf:
             zipf.extractall(cache_path.parent)
         path = r'.cache/easynlp/'
-        model = SequenceClassificationModel(path)
+        model = BertForSequenceClassification(path)
         preprocessor = SequenceClassificationPreprocessor(
             path, first_sequence='sentence', second_sequence=None)
         pipeline1 = SequenceClassificationPipeline(model, preprocessor)
@@ -53,8 +67,8 @@ class SequenceClassificationTest(unittest.TestCase):
             Tasks.text_classification, model=model, preprocessor=preprocessor)
         print(pipeline2('Hello world!'))
 
-    def test_run_modelhub(self):
-        model = Model.from_pretrained('damo/bert-base-sst2')
+    def test_run_with_model_from_modelhub(self):
+        model = Model.from_pretrained(self.model_id)
         preprocessor = SequenceClassificationPreprocessor(
             model.model_dir, first_sequence='sentence', second_sequence=None)
         pipeline_ins = pipeline(
@@ -63,8 +77,21 @@ class SequenceClassificationTest(unittest.TestCase):
             preprocessor=preprocessor)
         self.predict(pipeline_ins)
 
+    def test_run_with_model_name(self):
+        text_classification = pipeline(
+            task=Tasks.text_classification, model=self.model_id)
+        result = text_classification(
+            PyDataset.load('glue', name='sst2', target='sentence'))
+        self.printDataset(result)
+
+    def test_run_with_default_model(self):
+        text_classification = pipeline(task=Tasks.text_classification)
+        result = text_classification(
+            PyDataset.load('glue', name='sst2', target='sentence'))
+        self.printDataset(result)
+
     def test_run_with_dataset(self):
-        model = Model.from_pretrained('damo/bert-base-sst2')
+        model = Model.from_pretrained(self.model_id)
         preprocessor = SequenceClassificationPreprocessor(
             model.model_dir, first_sequence='sentence', second_sequence=None)
         text_classification = pipeline(
@@ -74,10 +101,7 @@ class SequenceClassificationTest(unittest.TestCase):
         # TODO: rename parameter as dataset_name and subset_name
         dataset = PyDataset.load('glue', name='sst2', target='sentence')
         result = text_classification(dataset)
-        for i, r in enumerate(result):
-            if i > 10:
-                break
-            print(r)
+        self.printDataset(result)
 
 
 if __name__ == '__main__':
diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py
new file mode 100644
index 00000000..d8f1b495
--- /dev/null
+++ b/tests/pipelines/test_text_generation.py
@@ -0,0 +1,51 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+from maas_hub.snapshot_download import snapshot_download
+
+from modelscope.models import Model
+from modelscope.models.nlp import PalmForTextGenerationModel
+from modelscope.pipelines import TextGenerationPipeline, pipeline
+from modelscope.preprocessors import TextGenerationPreprocessor
+from modelscope.utils.constant import Tasks
+
+
+class TextGenerationTest(unittest.TestCase):
+    model_id = 'damo/nlp_palm_text-generation_chinese'
+    input1 = "今日天气类型='晴'&温度变化趋势='大幅上升'&最低气温='28℃'&最高气温='31℃'&体感='湿热'"
+    input2 = "今日天气类型='多云'&体感='舒适'&最低气温='26℃'&最高气温='30℃'"
+
+    @unittest.skip('skip temporarily to save test time')
+    def test_run(self):
+        cache_path = snapshot_download(self.model_id)
+        preprocessor = TextGenerationPreprocessor(
+            cache_path, first_sequence='sentence', second_sequence=None)
+        model = PalmForTextGenerationModel(
+            cache_path, tokenizer=preprocessor.tokenizer)
+        pipeline1 = TextGenerationPipeline(model, preprocessor)
+        pipeline2 = pipeline(
+            Tasks.text_generation, model=model, preprocessor=preprocessor)
+        print(f'input: {self.input1}\npipeline1: {pipeline1(self.input1)}')
+        print()
+        print(f'input: {self.input2}\npipeline2: {pipeline2(self.input2)}')
+
+    def test_run_with_model_from_modelhub(self):
+        model = Model.from_pretrained(self.model_id)
+        preprocessor = TextGenerationPreprocessor(
+            model.model_dir, first_sequence='sentence', second_sequence=None)
+        pipeline_ins = pipeline(
+            task=Tasks.text_generation, model=model, preprocessor=preprocessor)
+        print(pipeline_ins(self.input1))
+
+    def test_run_with_model_name(self):
+        pipeline_ins = pipeline(
+            task=Tasks.text_generation, model=self.model_id)
+        print(pipeline_ins(self.input2))
+
+    def test_run_with_default_model(self):
+        pipeline_ins = pipeline(task=Tasks.text_generation)
+        print(pipeline_ins(self.input2))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/preprocessors/test_common.py b/tests/preprocessors/test_common.py
index d9b0f74f..1ee13589 100644
--- a/tests/preprocessors/test_common.py
+++ b/tests/preprocessors/test_common.py
@@ -2,7 +2,7 @@
 
 import unittest
 
-from maas_lib.preprocessors import PREPROCESSORS, Compose, Preprocessor
+from modelscope.preprocessors import PREPROCESSORS, Compose, Preprocessor
 
 
 class ComposeTest(unittest.TestCase):
diff --git a/tests/preprocessors/test_nlp.py b/tests/preprocessors/test_nlp.py
index 740bf938..fca01597 100644
--- a/tests/preprocessors/test_nlp.py
+++ b/tests/preprocessors/test_nlp.py
@@ -2,9 +2,9 @@
 
 import unittest
 
-from maas_lib.preprocessors import build_preprocessor
-from maas_lib.utils.constant import Fields, InputFields
-from maas_lib.utils.logger import get_logger
+from modelscope.preprocessors import build_preprocessor
+from modelscope.utils.constant import Fields, InputFields
+from modelscope.utils.logger import get_logger
 
 logger = get_logger()
 
diff --git a/tests/pydatasets/__init__.py b/tests/pydatasets/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/pydatasets/test_py_dataset.py b/tests/pydatasets/test_py_dataset.py
new file mode 100644
index 00000000..7accd814
--- /dev/null
+++ b/tests/pydatasets/test_py_dataset.py
@@ -0,0 +1,44 @@
+import unittest
+
+import datasets as hfdata
+
+from modelscope.pydatasets import PyDataset
+
+
+class PyDatasetTest(unittest.TestCase):
+
+    def setUp(self):
+        # ds1 initialized from in memory json
+        self.json_data = {
+            'dummy': [{
+                'a': i,
+                'x': i * 10,
+                'c': i * 100
+            } for i in range(1, 11)]
+        }
+        hfds1 = hfdata.Dataset.from_dict(self.json_data)
+        self.ds1 = PyDataset.from_hf_dataset(hfds1)
+
+        # ds2 initialized from hg hub
+        hfds2 = hfdata.load_dataset(
+            'glue', 'mrpc', revision='2.0.0', split='train')
+        self.ds2 = PyDataset.from_hf_dataset(hfds2)
+
+    def tearDown(self):
+        pass
+
+    def test_to_hf_dataset(self):
+        hfds = self.ds1.to_hf_dataset()
+        hfds1 = hfdata.Dataset.from_dict(self.json_data)
+        self.assertEqual(hfds.data, hfds1.data)
+
+        # simple map function
+        hfds = hfds.map(lambda e: {'new_feature': e['dummy']['a']})
+        self.assertEqual(len(hfds['new_feature']), 10)
+
+        hfds2 = self.ds2.to_hf_dataset()
+        self.assertTrue(hfds2[0]['sentence1'].startswith('Amrozi'))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/trainers/test_sequence_classification_trainer.py b/tests/trainers/test_sequence_classification_trainer.py
index 9846db4f..c0b2d109 100644
--- a/tests/trainers/test_sequence_classification_trainer.py
+++ b/tests/trainers/test_sequence_classification_trainer.py
@@ -2,9 +2,9 @@ import unittest
 import zipfile
 from pathlib import Path
 
-from maas_lib.fileio import File
-from maas_lib.trainers import build_trainer
-from maas_lib.utils.logger import get_logger
+from modelscope.fileio import File
+from modelscope.trainers import build_trainer
+from modelscope.utils.logger import get_logger
 
 logger = get_logger()
 
diff --git a/tests/trainers/test_trainer_base.py b/tests/trainers/test_trainer_base.py
index e764d6c9..c5fc1303 100644
--- a/tests/trainers/test_trainer_base.py
+++ b/tests/trainers/test_trainer_base.py
@@ -2,7 +2,7 @@
 
 import unittest
 
-from maas_lib.trainers import build_trainer
+from modelscope.trainers import build_trainer
 
 
 class DummyTrainerTest(unittest.TestCase):
diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py
index 31d51311..48f1d4a8 100644
--- a/tests/utils/test_config.py
+++ b/tests/utils/test_config.py
@@ -5,8 +5,8 @@ import tempfile
 import unittest
 from pathlib import Path
 
-from maas_lib.fileio import dump, load
-from maas_lib.utils.config import Config
+from modelscope.fileio import dump, load
+from modelscope.utils.config import Config
 
 obj = {'a': 1, 'b': {'c': [1, 2, 3], 'd': 'dd'}}
 
diff --git a/tests/utils/test_hub_operation.py b/tests/utils/test_hub_operation.py
new file mode 100644
index 00000000..f432a60c
--- /dev/null
+++ b/tests/utils/test_hub_operation.py
@@ -0,0 +1,50 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os.path as osp
+import unittest
+
+from maas_hub.maas_api import MaasApi
+from maas_hub.repository import Repository
+
+USER_NAME = 'maasadmin'
+PASSWORD = '12345678'
+
+
+class HubOperationTest(unittest.TestCase):
+
+    def setUp(self):
+        self.api = MaasApi()
+        # note this is temporary before official account management is ready
+        self.api.login(USER_NAME, PASSWORD)
+
+    @unittest.skip('to be used for local test only')
+    def test_model_repo_creation(self):
+        # change to proper model names before use
+        model_name = 'cv_unet_person-image-cartoon_compound-models'
+        model_chinese_name = '达摩卡通化模型'
+        model_org = 'damo'
+        try:
+            self.api.create_model(
+                owner=model_org,
+                name=model_name,
+                chinese_name=model_chinese_name,
+                visibility=5,  # 1-private, 5-public
+                license='apache-2.0')
+        # TODO: support proper name duplication checking
+        except KeyError as ke:
+            if ke.args[0] == 'name':
+                print(f'model {self.model_name} already exists, ignore')
+            else:
+                raise
+
+    # Note that this can be done via git operation once model repo
+    # has been created. Git-Op is the RECOMMENDED model upload approach
+    @unittest.skip('to be used for local test only')
+    def test_model_upload(self):
+        local_path = '/path/to/local/model/directory'
+        assert osp.exists(local_path), 'Local model directory not exist.'
+        repo = Repository(local_dir=local_path)
+        repo.push_to_hub(commit_message='Upload model files')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/test_registry.py b/tests/utils/test_registry.py
index 982b9f21..67e44f4e 100644
--- a/tests/utils/test_registry.py
+++ b/tests/utils/test_registry.py
@@ -1,8 +1,8 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import unittest
 
-from maas_lib.utils.constant import Tasks
-from maas_lib.utils.registry import Registry, build_from_cfg, default_group
+from modelscope.utils.constant import Tasks
+from modelscope.utils.registry import Registry, build_from_cfg, default_group
 
 
 class RegistryTest(unittest.TestCase):
diff --git a/tests/utils/test_type_assert.py b/tests/utils/test_type_assert.py
index 4ec9f2e5..5b62a269 100644
--- a/tests/utils/test_type_assert.py
+++ b/tests/utils/test_type_assert.py
@@ -3,7 +3,7 @@
 import unittest
 from typing import List, Union
 
-from maas_lib.utils.type_assert import type_assert
+from modelscope.utils.type_assert import type_assert
 
 
 class type_assertTest(unittest.TestCase):