From 7798a6250a9045f9239e6646f49411cd11f40708 Mon Sep 17 00:00:00 2001
From: "wenmeng.zwm" <wenmeng.zwm@alibaba-inc.com>
Date: Tue, 2 Aug 2022 20:21:05 +0800
Subject: [PATCH] [to #43112692] stardarized task name and output

1. task name and output definition: [link](https://alidocs.dingtalk.com/i/nodes/KOEmgBoGwD78vd2bAry3VndLerP9b30a?nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA&iframeQuery=utm_source%3Dportal%26utm_medium%3Dportal_space_file_tree)

2. rearrange task definition and add more outputs definition for tasks

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9585469
---
 modelscope/metrics/builder.py                 |   4 +-
 .../image_color_enhance.py                    |   3 +-
 .../image_denoise/nafnet_for_image_denoise.py |   2 +-
 modelscope/outputs.py                         | 303 +++++++++++-------
 modelscope/pipelines/builder.py               |   9 +-
 .../cv/image_color_enhance_pipeline.py        |   2 +-
 .../pipelines/cv/image_denoise_pipeline.py    |   2 +-
 .../pipelines/cv/image_matting_pipeline.py    |   2 +
 .../cv/image_to_image_translation_pipeline.py |   3 +-
 modelscope/pipelines/util.py                  |   3 +-
 modelscope/utils/constant.py                  |  64 ++--
 tests/pipelines/test_builder.py               |  11 +-
 .../pipelines/test_image2image_translation.py |   2 +-
 tests/pipelines/test_image_color_enhance.py   |   4 +-
 tests/pipelines/test_image_denoise.py         |   7 +-
 tests/pipelines/test_image_matting.py         |  10 +-
 tests/utils/test_registry.py                  |   7 +-
 17 files changed, 263 insertions(+), 175 deletions(-)

diff --git a/modelscope/metrics/builder.py b/modelscope/metrics/builder.py
index ab837ff0..5b9f962e 100644
--- a/modelscope/metrics/builder.py
+++ b/modelscope/metrics/builder.py
@@ -22,8 +22,8 @@ task_default_metrics = {
     Tasks.sentence_similarity: [Metrics.seq_cls_metric],
     Tasks.sentiment_classification: [Metrics.seq_cls_metric],
     Tasks.text_generation: [Metrics.text_gen_metric],
-    Tasks.image_denoise: [Metrics.image_denoise_metric],
-    Tasks.image_color_enhance: [Metrics.image_color_enhance_metric]
+    Tasks.image_denoising: [Metrics.image_denoise_metric],
+    Tasks.image_color_enhancement: [Metrics.image_color_enhance_metric]
 }
 
 
diff --git a/modelscope/models/cv/image_color_enhance/image_color_enhance.py b/modelscope/models/cv/image_color_enhance/image_color_enhance.py
index d142e682..382cc152 100644
--- a/modelscope/models/cv/image_color_enhance/image_color_enhance.py
+++ b/modelscope/models/cv/image_color_enhance/image_color_enhance.py
@@ -17,7 +17,8 @@ logger = get_logger()
 __all__ = ['ImageColorEnhance']
 
 
-@MODELS.register_module(Tasks.image_color_enhance, module_name=Models.csrnet)
+@MODELS.register_module(
+    Tasks.image_color_enhancement, module_name=Models.csrnet)
 class ImageColorEnhance(TorchModel):
 
     def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py b/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py
index 35f0eb5a..eaf5d0c5 100644
--- a/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py
+++ b/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py
@@ -19,7 +19,7 @@ logger = get_logger()
 __all__ = ['NAFNetForImageDenoise']
 
 
-@MODELS.register_module(Tasks.image_denoise, module_name=Models.nafnet)
+@MODELS.register_module(Tasks.image_denoising, module_name=Models.nafnet)
 class NAFNetForImageDenoise(TorchModel):
 
     def __init__(self, model_dir: str, *args, **kwargs):
diff --git a/modelscope/outputs.py b/modelscope/outputs.py
index c28f2fb9..20254416 100644
--- a/modelscope/outputs.py
+++ b/modelscope/outputs.py
@@ -21,6 +21,7 @@ class OutputKeys(object):
     OUTPUT_IMG = 'output_img'
     OUTPUT_PCM = 'output_pcm'
     IMG_EMBEDDING = 'img_embedding'
+    SPO_LIST = 'spo_list'
     TEXT_EMBEDDING = 'text_embedding'
     TRANSLATION = 'translation'
     RESPONSE = 'response'
@@ -29,32 +30,21 @@ class OutputKeys(object):
     PROBABILITIES = 'probabilities'
     DIALOG_STATES = 'dialog_states'
     VIDEO_EMBEDDING = 'video_embedding'
+    UUID = 'uuid'
+    WORD = 'word'
+    KWS_LIST = 'kws_list'
 
 
 TASK_OUTPUTS = {
 
     # ============ vision tasks ===================
 
-    # image classification result for single sample
-    #   {
-    #       "scores": [0.9, 0.1, 0.05, 0.05]
-    #       "labels": ["dog", "horse", "cow", "cat"],
-    #   }
-    Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
-    Tasks.image_tagging: [OutputKeys.SCORES, OutputKeys.LABELS],
-
-    # object detection result for single sample
-    #   {
-    #       "scores": [0.9, 0.1, 0.05, 0.05]
-    #       "labels": ["dog", "horse", "cow", "cat"],
-    #       "boxes": [
-    #           [x1, y1, x2, y2],
-    #           [x1, y1, x2, y2],
-    #           [x1, y1, x2, y2],
-    #       ],
-    #   }
-    Tasks.object_detection:
-    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+    # ocr detection result for single sample
+    # {
+    #   "polygons": np.array with shape [num_text, 8], each polygon is
+    #       [x1, y1, x2, y2, x3, y3, x4, y4]
+    # }
+    Tasks.ocr_detection: [OutputKeys.POLYGONS],
 
     # face detection result for single sample
     #   {
@@ -81,35 +71,79 @@ TASK_OUTPUTS = {
     #   }
     Tasks.face_recognition: [OutputKeys.IMG_EMBEDDING],
 
+    # human detection result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["person", "person", "person", "person"],
+    #       "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ],
+    #   }
+    #
+    Tasks.human_detection:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+
+    # face generation result for single sample
+    # {
+    #   "output_img": np.array with shape(h, w, 3)
+    # }
+    Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG],
+
+    # image classification result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["dog", "horse", "cow", "cat"],
+    #   }
+    Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
+
+    # object detection result for single sample
+    #   {
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "labels": ["dog", "horse", "cow", "cat"],
+    #       "boxes": [
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #           [x1, y1, x2, y2],
+    #       ],
+    #   }
+    Tasks.image_object_detection:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+
     # instance segmentation result for single sample
     #   {
     #       "scores": [0.9, 0.1, 0.05, 0.05],
     #       "labels": ["dog", "horse", "cow", "cat"],
-    #       "boxes": [
-    #           np.array in bgr channel order
+    #       "masks": [
+    #           np.array # 2D array containing only 0, 1
     #       ]
     #   }
     Tasks.image_segmentation:
-    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS],
 
-    # image generation/editing/matting result for single sample
+    # image matting result for single sample
     # {
     #   "output_img": np.array with shape(h, w, 4)
-    #                 for matting or (h, w, 3) for general purpose
     # }
-    Tasks.image_editing: [OutputKeys.OUTPUT_IMG],
     Tasks.image_matting: [OutputKeys.OUTPUT_IMG],
-    Tasks.image_generation: [OutputKeys.OUTPUT_IMG],
-    Tasks.image_denoise: [OutputKeys.OUTPUT_IMG],
-    Tasks.image_colorization: [OutputKeys.OUTPUT_IMG],
-    Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG],
+    Tasks.protrait_matting: [OutputKeys.OUTPUT_IMG],
+
+    # image editing task result for a single image
+    # {"output_img": np.array with shape (h, w, 3)}
+    Tasks.image_protrait_enhancement: [OutputKeys.OUTPUT_IMG],
+    Tasks.skin_retouching: [OutputKeys.OUTPUT_IMG],
     Tasks.image_super_resolution: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_colorization: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_color_enhancement: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_denoising: [OutputKeys.OUTPUT_IMG],
 
-    # action recognition result for single video
-    # {
-    #   "output_label": "abseiling"
-    # }
-    Tasks.action_recognition: [OutputKeys.LABELS],
+    # image generation task result for a single image
+    # {"output_img": np.array with shape (h, w, 3)}
+    Tasks.image_to_image_generation: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_to_image_translation: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_style_transfer: [OutputKeys.OUTPUT_IMG],
+    Tasks.image_portrait_stylization: [OutputKeys.OUTPUT_IMG],
 
     # live category recognition result for single video
     # {
@@ -117,28 +151,19 @@ TASK_OUTPUTS = {
     #       "labels": ['女装/女士精品>>棉衣/棉服', '女装/女士精品>>牛仔裤', '女装/女士精品>>裤子>>休闲裤'],
     # }
     Tasks.live_category: [OutputKeys.SCORES, OutputKeys.LABELS],
-    # video category recognition result for single video
-    # {
-    #       "scores": [0.7716429233551025]
-    #       "labels": ['生活>>好物推荐'],
-    # }
-    Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS],
 
-    # pose estimation result for single sample
+    # action recognition result for single video
     # {
-    #   "poses": np.array with shape [num_pose, num_keypoint, 3],
-    #       each keypoint is a array [x, y, score]
-    #   "boxes": np.array with shape [num_pose, 4], each box is
-    #       [x1, y1, x2, y2]
+    #   "output_label": "abseiling"
     # }
-    Tasks.pose_estimation: [OutputKeys.POSES, OutputKeys.BOXES],
+    Tasks.action_recognition: [OutputKeys.LABELS],
 
-    # ocr detection result for single sample
+    # video category recognition result for single video
     # {
-    #   "polygons": np.array with shape [num_text, 8], each polygon is
-    #       [x1, y1, x2, y2, x3, y3, x4, y4]
+    #       "scores": [0.7716429233551025]
+    #       "labels": ['生活>>好物推荐'],
     # }
-    Tasks.ocr_detection: [OutputKeys.POLYGONS],
+    Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS],
 
     # image embedding result for a single image
     # {
@@ -152,11 +177,11 @@ TASK_OUTPUTS = {
     # }
     Tasks.video_embedding: [OutputKeys.VIDEO_EMBEDDING],
 
-    # image_color_enhance result for a single sample
+    # virtual_try_on result for a single sample
     # {
-    #    "output_img": np.ndarray with shape [height, width, 3], uint8
+    #    "output_img": np.ndarray with shape [height, width, 3]
     # }
-    Tasks.image_color_enhance: [OutputKeys.OUTPUT_IMG],
+    Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG],
 
     # ============ nlp tasks ===================
 
@@ -167,33 +192,6 @@ TASK_OUTPUTS = {
     #   }
     Tasks.text_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
 
-    # text generation result for single sample
-    # {
-    #   "text": "this is the text generated by a model."
-    # }
-    Tasks.text_generation: [OutputKeys.TEXT],
-
-    # fill mask result for single sample
-    # {
-    #   "text": "this is the text which masks filled by model."
-    # }
-    Tasks.fill_mask: [OutputKeys.TEXT],
-
-    # word segmentation result for single sample
-    # {
-    #   "output": "今天 天气 不错 ， 适合 出去 游玩"
-    # }
-    Tasks.word_segmentation: [OutputKeys.OUTPUT],
-
-    # named entity recognition result for single sample
-    # {
-    #   "output": [
-    #     {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"},
-    #     {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"}
-    #   ]
-    # }
-    Tasks.named_entity_recognition: [OutputKeys.OUTPUT],
-
     # sentence similarity result for single sample
     #   {
     #       "scores": 0.9
@@ -201,11 +199,12 @@ TASK_OUTPUTS = {
     #   }
     Tasks.sentence_similarity: [OutputKeys.SCORES, OutputKeys.LABELS],
 
-    # translation result for a source sentence
+    # nli result for single sample
     #   {
-    #       "translation": “北京是中国的首都”
+    #       "labels": ["happy", "sad", "calm", "angry"],
+    #       "scores": [0.9, 0.1, 0.05, 0.05]
     #   }
-    Tasks.translation: [OutputKeys.TRANSLATION],
+    Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS],
 
     # sentiment classification result for single sample
     #   {
@@ -221,14 +220,78 @@ TASK_OUTPUTS = {
     #   }
     Tasks.zero_shot_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
 
-    # nli result for single sample
+    # relation extraction result for a single sample
+    # {
+    #     "uuid": "人生信息-1",
+    #     "text": "《父老乡亲》是由是由由中国人民解放军海政文工团创作的军旅歌曲，石顺义作词，王锡仁作曲，范琳琳演唱",
+    #     "spo_list": [{"subject": "石顺义", "predicate": "国籍", "object": "中国"}]
+    # }
+    Tasks.relation_extraction:
+    [OutputKeys.UUID, OutputKeys.TEXT, OutputKeys.SPO_LIST],
+
+    # translation result for a source sentence
     #   {
-    #       "labels": ["happy", "sad", "calm", "angry"],
-    #       "scores": [0.9, 0.1, 0.05, 0.05]
+    #       "translation": “北京是中国的首都”
     #   }
-    Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS],
+    Tasks.translation: [OutputKeys.TRANSLATION],
+
+    # word segmentation result for single sample
+    # {
+    #   "output": "今天 天气 不错 ， 适合 出去 游玩"
+    # }
+    Tasks.word_segmentation: [OutputKeys.OUTPUT],
+
+    # part-of-speech result for single sample
+    # [
+    #     {'word': '诸葛', 'label': 'PROPN'},
+    #     {'word': '亮', 'label': 'PROPN'},
+    #     {'word': '发明', 'label': 'VERB'},
+    #     {'word': '八', 'label': 'NUM'},
+    #     {'word': '阵', 'label': 'NOUN'},
+    #     {'word': '图', 'label': 'PART'},
+    #     {'word': '以', 'label': 'ADV'},
+    #     {'word': '利', 'label': 'VERB'},
+    #     {'word': '立营', 'label': 'VERB'},
+    #     {'word': '练兵', 'label': 'VERB'},
+    #     {'word': '.', 'label': 'PUNCT'}
+    # ]
+    # TODO @wenmeng.zwm support list of result check
+    Tasks.part_of_speech: [OutputKeys.WORD, OutputKeys.LABEL],
+
+    # named entity recognition result for single sample
+    # {
+    #   "output": [
+    #     {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"},
+    #     {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"}
+    #   ]
+    # }
+    Tasks.named_entity_recognition: [OutputKeys.OUTPUT],
 
-    # dialog intent prediction result for single sample
+    # text_error_correction result for a single sample
+    # {
+    #    "output": "我想吃苹果"
+    # }
+    Tasks.text_error_correction: [OutputKeys.OUTPUT],
+
+    # text generation result for single sample
+    # {
+    #   "text": "this is the text generated by a model."
+    # }
+    Tasks.text_generation: [OutputKeys.TEXT],
+
+    # text feature extraction for single sample
+    # {
+    #   "text_embedding": np.array with shape [1, D]
+    # }
+    Tasks.sentence_embedding: [OutputKeys.TEXT_EMBEDDING],
+
+    # fill mask result for single sample
+    # {
+    #   "text": "this is the text which masks filled by model."
+    # }
+    Tasks.fill_mask: [OutputKeys.TEXT],
+
+    # (Deprecated) dialog intent prediction result for single sample
     # {'pred': array([2.62349960e-03, 4.12110658e-03, 4.12748595e-05, 3.77560973e-05,
     #        1.08599677e-04, 1.72710388e-05, 2.95618793e-05, 1.93638436e-04,
     #        6.45841064e-05, 1.15997791e-04, 5.11605394e-05, 9.87020373e-01,
@@ -252,11 +315,11 @@ TASK_OUTPUTS = {
     Tasks.dialog_intent_prediction:
     [OutputKeys.PREDICTION, OutputKeys.LABEL_POS, OutputKeys.LABEL],
 
-    # dialog modeling prediction result for single sample
+    # (Deprecated) dialog modeling prediction result for single sample
     # sys : ['you', 'are', 'welcome', '.', 'have', 'a', 'great', 'day', '!']
     Tasks.dialog_modeling: [OutputKeys.RESPONSE],
 
-    # dialog state tracking result for single sample
+    # (Deprecated) dialog state tracking result for single sample
     # {
     #     "dialog_states": {
     #         "taxi-leaveAt": "none",
@@ -294,6 +357,9 @@ TASK_OUTPUTS = {
     Tasks.dialog_state_tracking: [OutputKeys.DIALOG_STATES],
 
     # ============ audio tasks ===================
+    # asr result for single sample
+    # { "text": "每一天都要快乐喔"}
+    Tasks.auto_speech_recognition: [OutputKeys.TEXT],
 
     # audio processed for single file in PCM format
     # {
@@ -303,30 +369,19 @@ TASK_OUTPUTS = {
     Tasks.acoustic_echo_cancellation: [OutputKeys.OUTPUT_PCM],
     Tasks.acoustic_noise_suppression: [OutputKeys.OUTPUT_PCM],
 
-    # ============ multi-modal tasks ===================
-
-    # image caption result for single sample
+    # text_to_speech result for a single sample
     # {
-    #   "caption": "this is an image caption text."
+    #    "output_pcm": {"input_label" : np.ndarray with shape [D]}
     # }
-    Tasks.image_captioning: [OutputKeys.CAPTION],
+    Tasks.text_to_speech: [OutputKeys.OUTPUT_PCM],
 
-    # multi-modal embedding result for single sample
-    # {
-    #   "img_embedding": np.array with shape [1, D],
-    #   "text_embedding": np.array with shape [1, D]
-    # }
-    Tasks.multi_modal_embedding:
-    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING],
+    # ============ multi-modal tasks ===================
 
-    # generative multi-modal embedding result for single sample
+    # image caption result for single sample
     # {
-    #   "img_embedding": np.array with shape [1, D],
-    #   "text_embedding": np.array with shape [1, D],
     #   "caption": "this is an image caption text."
     # }
-    Tasks.generative_multi_modal_embedding:
-    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION],
+    Tasks.image_captioning: [OutputKeys.CAPTION],
 
     # visual grounding result for single sample
     # {
@@ -350,25 +405,31 @@ TASK_OUTPUTS = {
     #    "output_pcm": {"input_label" : np.ndarray with shape [D]}
     # }
     Tasks.text_to_speech: [OutputKeys.OUTPUT_PCM],
-    # virtual_try_on result for a single sample
+
+    # multi-modal embedding result for single sample
     # {
-    #    "output_img": np.ndarray with shape [height, width, 3]
+    #   "img_embedding": np.array with shape [1, D],
+    #   "text_embedding": np.array with shape [1, D]
     # }
-    Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG],
-    # visual_question_answering result for a single sample
+    Tasks.multi_modal_embedding:
+    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING],
+
+    # generative multi-modal embedding result for single sample
     # {
-    #    "text": "this is the text generated by a model."
+    #   "img_embedding": np.array with shape [1, D],
+    #   "text_embedding": np.array with shape [1, D],
+    #   "caption": "this is an image caption text."
     # }
+    Tasks.generative_multi_modal_embedding:
+    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION],
+
+    # VQA result for a sample
+    # {"text": "this is a text answser. "}
     Tasks.visual_question_answering: [OutputKeys.TEXT],
-    # auto_speech_recognition result for a single sample
-    # {
-    #    "text": "每天都要快乐喔"
-    # }
-    Tasks.auto_speech_recognition: [OutputKeys.TEXT],
 
-    # text_error_correction result for a single sample
     # {
-    #    "output": "我想吃苹果"
+    #       "scores": [0.9, 0.1, 0.1],
+    #       "labels": ["entailment", "contradiction", "neutral"]
     # }
-    Tasks.text_error_correction: [OutputKeys.OUTPUT]
+    Tasks.visual_entailment: [OutputKeys.SCORES, OutputKeys.LABELS],
 }
diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py
index d5cfba3d..4cf1924b 100644
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -39,8 +39,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
                             'damo/cv_resnet18_human-detection'),
     Tasks.image_object_detection: (Pipelines.object_detection,
                                    'damo/cv_vit_object-detection_coco'),
-    Tasks.image_denoise: (Pipelines.image_denoise,
-                          'damo/cv_nafnet_image-denoise_sidd'),
+    Tasks.image_denoising: (Pipelines.image_denoise,
+                            'damo/cv_nafnet_image-denoise_sidd'),
     Tasks.text_classification: (Pipelines.sentiment_analysis,
                                 'damo/bert-base-sst2'),
     Tasks.text_generation: (Pipelines.text_generation,
@@ -94,8 +94,9 @@ DEFAULT_MODEL_FOR_PIPELINE = {
     Tasks.video_multi_modal_embedding:
     (Pipelines.video_multi_modal_embedding,
      'damo/multi_modal_clip_vtretrival_msrvtt_53'),
-    Tasks.image_color_enhance: (Pipelines.image_color_enhance,
-                                'damo/cv_csrnet_image-color-enhance-models'),
+    Tasks.image_color_enhancement:
+    (Pipelines.image_color_enhance,
+     'damo/cv_csrnet_image-color-enhance-models'),
     Tasks.virtual_try_on: (Pipelines.virtual_try_on,
                            'damo/cv_daflow_virtual-try-on_base'),
     Tasks.image_colorization: (Pipelines.image_colorization,
diff --git a/modelscope/pipelines/cv/image_color_enhance_pipeline.py b/modelscope/pipelines/cv/image_color_enhance_pipeline.py
index b9007f77..40777d60 100644
--- a/modelscope/pipelines/cv/image_color_enhance_pipeline.py
+++ b/modelscope/pipelines/cv/image_color_enhance_pipeline.py
@@ -18,7 +18,7 @@ logger = get_logger()
 
 
 @PIPELINES.register_module(
-    Tasks.image_color_enhance, module_name=Pipelines.image_color_enhance)
+    Tasks.image_color_enhancement, module_name=Pipelines.image_color_enhance)
 class ImageColorEnhancePipeline(Pipeline):
 
     def __init__(self,
diff --git a/modelscope/pipelines/cv/image_denoise_pipeline.py b/modelscope/pipelines/cv/image_denoise_pipeline.py
index 0c6c878a..64aa3bc9 100644
--- a/modelscope/pipelines/cv/image_denoise_pipeline.py
+++ b/modelscope/pipelines/cv/image_denoise_pipeline.py
@@ -19,7 +19,7 @@ __all__ = ['ImageDenoisePipeline']
 
 
 @PIPELINES.register_module(
-    Tasks.image_denoise, module_name=Pipelines.image_denoise)
+    Tasks.image_denoising, module_name=Pipelines.image_denoise)
 class ImageDenoisePipeline(Pipeline):
 
     def __init__(self,
diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py
index 2faaec37..f440440d 100644
--- a/modelscope/pipelines/cv/image_matting_pipeline.py
+++ b/modelscope/pipelines/cv/image_matting_pipeline.py
@@ -15,6 +15,8 @@ from modelscope.utils.logger import get_logger
 logger = get_logger()
 
 
+@PIPELINES.register_module(
+    Tasks.protrait_matting, module_name=Pipelines.image_matting)
 @PIPELINES.register_module(
     Tasks.image_matting, module_name=Pipelines.image_matting)
 class ImageMattingPipeline(Pipeline):
diff --git a/modelscope/pipelines/cv/image_to_image_translation_pipeline.py b/modelscope/pipelines/cv/image_to_image_translation_pipeline.py
index a9f83e02..78901c9b 100644
--- a/modelscope/pipelines/cv/image_to_image_translation_pipeline.py
+++ b/modelscope/pipelines/cv/image_to_image_translation_pipeline.py
@@ -34,7 +34,8 @@ def save_grid(imgs, filename, nrow=5):
 
 
 @PIPELINES.register_module(
-    Tasks.image_generation, module_name=Pipelines.image2image_translation)
+    Tasks.image_to_image_translation,
+    module_name=Pipelines.image2image_translation)
 class Image2ImageTranslationPipeline(Pipeline):
 
     def __init__(self, model: str, **kwargs):
diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py
index 03383bc1..2c2c7751 100644
--- a/modelscope/pipelines/util.py
+++ b/modelscope/pipelines/util.py
@@ -34,7 +34,8 @@ def is_official_hub_path(path: Union[str, List],
             try:
                 _ = HubApi().get_model(path, revision=revision)
                 return True
-            except Exception:
+            except Exception as e:
+                logger.warning(f'get model exception: {e}')
                 return False
 
     if isinstance(path, str):
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index 73c55152..20311fba 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -14,42 +14,60 @@ class Fields(object):
 
 
 class CVTasks(object):
-    # vision tasks
-    image_to_text = 'image-to-text'
-    pose_estimation = 'pose-estimation'
+    # ocr
+    ocr_detection = 'ocr-detection'
+    ocr_recognition = 'ocr-recognition'
+
+    # human face body related
+    face_detection = 'face-detection'
+    face_recognition = 'face-recognition'
+    human_detection = 'human-detection'
+    human_object_interaction = 'human-object-interaction'
+    face_image_generation = 'face-image-generation'
+
     image_classification = 'image-classification'
-    image_tagging = 'image-tagging'
-    object_detection = 'object-detection'
+    image_multilabel_classification = 'image-multilabel-classification'
+    image_classification_imagenet = 'image-classification-imagenet'
+    image_classification_dailylife = 'image-classification-dailylife'
+
     image_object_detection = 'image-object-detection'
-    human_detection = 'human-detection'
+
     image_segmentation = 'image-segmentation'
-    image_editing = 'image-editing'
-    image_generation = 'image-generation'
     image_matting = 'image-matting'
-    image_denoise = 'image-denoise'
-    ocr_detection = 'ocr-detection'
-    action_recognition = 'action-recognition'
-    video_embedding = 'video-embedding'
-    face_detection = 'face-detection'
-    face_recognition = 'face-recognition'
-    image_color_enhance = 'image-color-enhance'
-    virtual_try_on = 'virtual-try-on'
+    protrait_matting = 'protrait-matting'
+
+    # image editting
+    image_protrait_enhancement = 'image-protrait-enhancement'
+    skin_retouching = 'skin-retouching'
+    image_super_resolution = 'image-super-resolution'
     image_colorization = 'image-colorization'
-    face_image_generation = 'face-image-generation'
+    image_color_enhancement = 'image-color-enhancement'
+    image_denoising = 'image-denoising'
+
+    # image generation
+    image_to_image_translation = 'image-to-image-translation'
+    image_to_image_generation = 'image-to-image-generation'
     image_style_transfer = 'image-style-transfer'
-    image_super_resolution = 'image-super-resolution'
+    image_portrait_stylization = 'image-portrait-stylization'
+
+    image_embedding = 'image-embedding'
+
     product_retrieval_embedding = 'product-retrieval-embedding'
+
+    # video recognition
     live_category = 'live-category'
+    action_recognition = 'action-recognition'
     video_category = 'video-category'
-    image_classification_imagenet = 'image-classification-imagenet'
-    image_classification_dailylife = 'image-classification-dailylife'
-    image_portrait_stylization = 'image-portrait-stylization'
-    image_to_image_generation = 'image-to-image-generation'
+
+    video_embedding = 'video-embedding'
+
+    virtual_try_on = 'virtual-try-on'
 
 
 class NLPTasks(object):
     # nlp tasks
     word_segmentation = 'word-segmentation'
+    part_of_speech = 'part-of-speech'
     named_entity_recognition = 'named-entity-recognition'
     nli = 'nli'
     sentiment_classification = 'sentiment-classification'
@@ -66,7 +84,7 @@ class NLPTasks(object):
     dialog_intent_prediction = 'dialog-intent-prediction'
     dialog_state_tracking = 'dialog-state-tracking'
     table_question_answering = 'table-question-answering'
-    feature_extraction = 'feature-extraction'
+    sentence_embedding = 'sentence-embedding'
     fill_mask = 'fill-mask'
     summarization = 'summarization'
     question_answering = 'question-answering'
diff --git a/tests/pipelines/test_builder.py b/tests/pipelines/test_builder.py
index a91a7391..baef5a6f 100644
--- a/tests/pipelines/test_builder.py
+++ b/tests/pipelines/test_builder.py
@@ -21,7 +21,7 @@ logger = get_logger()
 
 
 @PIPELINES.register_module(
-    group_key=Tasks.image_tagging, module_name='custom_single_model')
+    group_key=Tasks.image_classification, module_name='custom_single_model')
 class CustomSingleModelPipeline(Pipeline):
 
     def __init__(self,
@@ -38,7 +38,7 @@ class CustomSingleModelPipeline(Pipeline):
 
 
 @PIPELINES.register_module(
-    group_key=Tasks.image_tagging, module_name='model1_model2')
+    group_key=Tasks.image_classification, module_name='model1_model2')
 class CustomMultiModelPipeline(Pipeline):
 
     def __init__(self,
@@ -64,7 +64,7 @@ class PipelineInterfaceTest(unittest.TestCase):
         cfg_file = os.path.join(dirname, ModelFile.CONFIGURATION)
         cfg = {
             ConfigFields.framework: Frameworks.torch,
-            ConfigFields.task: Tasks.image_tagging,
+            ConfigFields.task: Tasks.image_classification,
             ConfigFields.pipeline: {
                 'type': pipeline_name,
             }
@@ -77,12 +77,13 @@ class PipelineInterfaceTest(unittest.TestCase):
         self.prepare_dir('/tmp/model2', 'model1_model2')
 
     def test_single_model(self):
-        pipe = pipeline(Tasks.image_tagging, model='/tmp/custom_single_model')
+        pipe = pipeline(
+            Tasks.image_classification, model='/tmp/custom_single_model')
         assert isinstance(pipe, CustomSingleModelPipeline)
 
     def test_multi_model(self):
         pipe = pipeline(
-            Tasks.image_tagging, model=['/tmp/model1', '/tmp/model2'])
+            Tasks.image_classification, model=['/tmp/model1', '/tmp/model2'])
         assert isinstance(pipe, CustomMultiModelPipeline)
 
 
diff --git a/tests/pipelines/test_image2image_translation.py b/tests/pipelines/test_image2image_translation.py
index 24766d25..8380af75 100644
--- a/tests/pipelines/test_image2image_translation.py
+++ b/tests/pipelines/test_image2image_translation.py
@@ -24,7 +24,7 @@ class Image2ImageTranslationTest(unittest.TestCase):
             just like the following code.
         """
         img2img_gen_pipeline = pipeline(
-            Tasks.image_generation,
+            Tasks.image_to_image_translation,
             model='damo/cv_latent_diffusion_image2image_translation')
         result = img2img_gen_pipeline(
             ('data/test/images/img2img_input_mask.png',
diff --git a/tests/pipelines/test_image_color_enhance.py b/tests/pipelines/test_image_color_enhance.py
index ae22d65e..62ffbcb9 100644
--- a/tests/pipelines/test_image_color_enhance.py
+++ b/tests/pipelines/test_image_color_enhance.py
@@ -27,13 +27,13 @@ class ImageColorEnhanceTest(unittest.TestCase):
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_run_modelhub(self):
         img_color_enhance = pipeline(
-            Tasks.image_color_enhance, model=self.model_id)
+            Tasks.image_color_enhancement, model=self.model_id)
         self.pipeline_inference(img_color_enhance,
                                 'data/test/images/image_color_enhance.png')
 
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_modelhub_default_model(self):
-        img_color_enhance = pipeline(Tasks.image_color_enhance)
+        img_color_enhance = pipeline(Tasks.image_color_enhancement)
         self.pipeline_inference(img_color_enhance,
                                 'data/test/images/image_color_enhance.png')
 
diff --git a/tests/pipelines/test_image_denoise.py b/tests/pipelines/test_image_denoise.py
index b53f2e42..d3e0af24 100644
--- a/tests/pipelines/test_image_denoise.py
+++ b/tests/pipelines/test_image_denoise.py
@@ -30,7 +30,7 @@ class ImageDenoiseTest(unittest.TestCase):
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_run_with_model_from_modelhub(self):
         model = Model.from_pretrained(self.model_id)
-        pipeline_ins = pipeline(task=Tasks.image_denoise, model=model)
+        pipeline_ins = pipeline(task=Tasks.image_denoising, model=model)
         denoise_img = pipeline_ins(
             input=self.demo_image_path)[OutputKeys.OUTPUT_IMG]
         denoise_img = Image.fromarray(denoise_img)
@@ -39,7 +39,8 @@ class ImageDenoiseTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_run_with_model_name(self):
-        pipeline_ins = pipeline(task=Tasks.image_denoise, model=self.model_id)
+        pipeline_ins = pipeline(
+            task=Tasks.image_denoising, model=self.model_id)
         denoise_img = pipeline_ins(
             input=self.demo_image_path)[OutputKeys.OUTPUT_IMG]
         denoise_img = Image.fromarray(denoise_img)
@@ -48,7 +49,7 @@ class ImageDenoiseTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_with_default_model(self):
-        pipeline_ins = pipeline(task=Tasks.image_denoise)
+        pipeline_ins = pipeline(task=Tasks.image_denoising)
         denoise_img = pipeline_ins(
             input=self.demo_image_path)[OutputKeys.OUTPUT_IMG]
         denoise_img = Image.fromarray(denoise_img)
diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py
index af8ace50..c5309978 100644
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -26,7 +26,7 @@ class ImageMattingTest(unittest.TestCase):
             model_file = osp.join(tmp_dir, ModelFile.TF_GRAPH_FILE)
             with open(model_file, 'wb') as ofile:
                 ofile.write(File.read(model_path))
-            img_matting = pipeline(Tasks.image_matting, model=tmp_dir)
+            img_matting = pipeline(Tasks.protrait_matting, model=tmp_dir)
 
             result = img_matting('data/test/images/image_matting.png')
             cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
@@ -38,7 +38,7 @@ class ImageMattingTest(unittest.TestCase):
         # input_location = '/dir/to/images'
 
         dataset = MsDataset.load(input_location, target='image')
-        img_matting = pipeline(Tasks.image_matting, model=self.model_id)
+        img_matting = pipeline(Tasks.protrait_matting, model=self.model_id)
         # note that for dataset output, the inference-output is a Generator that can be iterated.
         result = img_matting(dataset)
         cv2.imwrite('result.png', next(result)[OutputKeys.OUTPUT_IMG])
@@ -46,7 +46,7 @@ class ImageMattingTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
     def test_run_modelhub(self):
-        img_matting = pipeline(Tasks.image_matting, model=self.model_id)
+        img_matting = pipeline(Tasks.protrait_matting, model=self.model_id)
 
         result = img_matting('data/test/images/image_matting.png')
         cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
@@ -54,7 +54,7 @@ class ImageMattingTest(unittest.TestCase):
 
     @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
     def test_run_modelhub_default_model(self):
-        img_matting = pipeline(Tasks.image_matting)
+        img_matting = pipeline(Tasks.protrait_matting)
 
         result = img_matting('data/test/images/image_matting.png')
         cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
@@ -67,7 +67,7 @@ class ImageMattingTest(unittest.TestCase):
             namespace='damotest',
             split='test',
             target='file')
-        img_matting = pipeline(Tasks.image_matting, model=self.model_id)
+        img_matting = pipeline(Tasks.protrait_matting, model=self.model_id)
         result = img_matting(dataset)
         for i in range(2):
             cv2.imwrite(f'result_{i}.png', next(result)[OutputKeys.OUTPUT_IMG])
diff --git a/tests/utils/test_registry.py b/tests/utils/test_registry.py
index 67e44f4e..0a37101d 100644
--- a/tests/utils/test_registry.py
+++ b/tests/utils/test_registry.py
@@ -42,12 +42,13 @@ class RegistryTest(unittest.TestCase):
             MODELS.get('Bert', Tasks.sentiment_analysis) is
             BertForSentimentAnalysis)
 
-        @MODELS.register_module(Tasks.object_detection)
+        @MODELS.register_module(Tasks.image_object_detection)
         class DETR(object):
             pass
 
-        self.assertTrue(Tasks.object_detection in MODELS.modules)
-        self.assertTrue(MODELS.get('DETR', Tasks.object_detection) is DETR)
+        self.assertTrue(Tasks.image_object_detection in MODELS.modules)
+        self.assertTrue(
+            MODELS.get('DETR', Tasks.image_object_detection) is DETR)
 
         self.assertEqual(len(MODELS.modules), 4)