[to #43112692] stardarized task name and output

1. task name and output definition: [link](https://alidocs.dingtalk.com/i/nodes/KOEmgBoGwD78vd2bAry3VndLerP9b30a?nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA&iframeQuery=utm_source%3Dportal%26utm_medium%3Dportal_space_file_tree) 2. rearrange task definition and add more outputs definition for tasks Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9585469
3 years ago · 7798a6250a
--- a/modelscope/metrics/builder.py
+++ b/modelscope/metrics/builder.py
@@ -22,8 +22,8 @@ task_default_metrics = {
    Tasks.sentence_similarity: [Metrics.seq_cls_metric],
    Tasks.sentiment_classification: [Metrics.seq_cls_metric],
    Tasks.text_generation: [Metrics.text_gen_metric],
    Tasks.image_denoise: [Metrics.image_denoise_metric],
    Tasks.image_color_enhance: [Metrics.image_color_enhance_metric]
    Tasks.image_denoising: [Metrics.image_denoise_metric],
    Tasks.image_color_enhancement: [Metrics.image_color_enhance_metric]
 }


--- a/modelscope/models/cv/image_color_enhance/image_color_enhance.py
+++ b/modelscope/models/cv/image_color_enhance/image_color_enhance.py
@@ -17,7 +17,8 @@ logger = get_logger()
 __all__ = ['ImageColorEnhance']


@MODELS.register_module(Tasks.image_color_enhance, module_name=Models.csrnet)
@MODELS.register_module(
    Tasks.image_color_enhancement, module_name=Models.csrnet)
 class ImageColorEnhance(TorchModel):

    def __init__(self, model_dir: str, *args, **kwargs):
--- a/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py
+++ b/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py
@@ -19,7 +19,7 @@ logger = get_logger()
 __all__ = ['NAFNetForImageDenoise']


@MODELS.register_module(Tasks.image_denoise, module_name=Models.nafnet)
@MODELS.register_module(Tasks.image_denoising, module_name=Models.nafnet)
 class NAFNetForImageDenoise(TorchModel):

    def __init__(self, model_dir: str, *args, **kwargs):
--- a/modelscope/outputs.py
+++ b/modelscope/outputs.py
@@ -21,6 +21,7 @@ class OutputKeys(object):
    OUTPUT_IMG = 'output_img'
    OUTPUT_PCM = 'output_pcm'
    IMG_EMBEDDING = 'img_embedding'
    SPO_LIST = 'spo_list'
    TEXT_EMBEDDING = 'text_embedding'
    TRANSLATION = 'translation'
    RESPONSE = 'response'
@@ -29,32 +30,21 @@ class OutputKeys(object):
    PROBABILITIES = 'probabilities'
    DIALOG_STATES = 'dialog_states'
    VIDEO_EMBEDDING = 'video_embedding'
    UUID = 'uuid'
    WORD = 'word'
    KWS_LIST = 'kws_list'


 TASK_OUTPUTS = {

    # ============ vision tasks ===================

    # image classification result for single sample
    #   {
    #       "scores": [0.9, 0.1, 0.05, 0.05]
    #       "labels": ["dog", "horse", "cow", "cat"],
    #   }
    Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS],
    Tasks.image_tagging: [OutputKeys.SCORES, OutputKeys.LABELS],

    # object detection result for single sample
    #   {
    #       "scores": [0.9, 0.1, 0.05, 0.05]
    #       "labels": ["dog", "horse", "cow", "cat"],
    #       "boxes": [
    #           [x1, y1, x2, y2],
    #           [x1, y1, x2, y2],
    #           [x1, y1, x2, y2],
    #       ],
    #   }
    Tasks.object_detection:
    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
    # ocr detection result for single sample
    # {
    #   "polygons": np.array with shape [num_text, 8], each polygon is
    #       [x1, y1, x2, y2, x3, y3, x4, y4]
    # }
    Tasks.ocr_detection: [OutputKeys.POLYGONS],

    # face detection result for single sample
    #   {
@@ -81,35 +71,79 @@ TASK_OUTPUTS = {
    #   }
    Tasks.face_recognition: [OutputKeys.IMG_EMBEDDING],

    # human detection result for single sample
    #   {
    #       "scores": [0.9, 0.1, 0.05, 0.05]
    #       "labels": ["person", "person", "person", "person"],
    #       "boxes": [
    #           [x1, y1, x2, y2],
    #           [x1, y1, x2, y2],
    #           [x1, y1, x2, y2],
    #       ],
    #   }
    #
    Tasks.human_detection:
    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],

    # face generation result for single sample
    # {
    #   "output_img": np.array with shape(h, w, 3)
    # }
    Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG],

    # image classification result for single sample
    #   {
    #       "scores": [0.9, 0.1, 0.05, 0.05]
    #       "labels": ["dog", "horse", "cow", "cat"],
    #   }
    Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS],

    # object detection result for single sample
    #   {
    #       "scores": [0.9, 0.1, 0.05, 0.05]
    #       "labels": ["dog", "horse", "cow", "cat"],
    #       "boxes": [
    #           [x1, y1, x2, y2],
    #           [x1, y1, x2, y2],
    #           [x1, y1, x2, y2],
    #       ],
    #   }
    Tasks.image_object_detection:
    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],

    # instance segmentation result for single sample
    #   {
    #       "scores": [0.9, 0.1, 0.05, 0.05],
    #       "labels": ["dog", "horse", "cow", "cat"],
    #       "boxes": [
    #           np.array in bgr channel order
    #       "masks": [
    #           np.array # 2D array containing only 0, 1
    #       ]
    #   }
    Tasks.image_segmentation:
    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES],
    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS],

    # image generation/editing/matting result for single sample
    # image matting result for single sample
    # {
    #   "output_img": np.array with shape(h, w, 4)
    #                 for matting or (h, w, 3) for general purpose
    # }
    Tasks.image_editing: [OutputKeys.OUTPUT_IMG],
    Tasks.image_matting: [OutputKeys.OUTPUT_IMG],
    Tasks.image_generation: [OutputKeys.OUTPUT_IMG],
    Tasks.image_denoise: [OutputKeys.OUTPUT_IMG],
    Tasks.image_colorization: [OutputKeys.OUTPUT_IMG],
    Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG],
    Tasks.protrait_matting: [OutputKeys.OUTPUT_IMG],

    # image editing task result for a single image
    # {"output_img": np.array with shape (h, w, 3)}
    Tasks.image_protrait_enhancement: [OutputKeys.OUTPUT_IMG],
    Tasks.skin_retouching: [OutputKeys.OUTPUT_IMG],
    Tasks.image_super_resolution: [OutputKeys.OUTPUT_IMG],
    Tasks.image_colorization: [OutputKeys.OUTPUT_IMG],
    Tasks.image_color_enhancement: [OutputKeys.OUTPUT_IMG],
    Tasks.image_denoising: [OutputKeys.OUTPUT_IMG],

    # action recognition result for single video
    # {
    #   "output_label": "abseiling"
    # }
    Tasks.action_recognition: [OutputKeys.LABELS],
    # image generation task result for a single image
    # {"output_img": np.array with shape (h, w, 3)}
    Tasks.image_to_image_generation: [OutputKeys.OUTPUT_IMG],
    Tasks.image_to_image_translation: [OutputKeys.OUTPUT_IMG],
    Tasks.image_style_transfer: [OutputKeys.OUTPUT_IMG],
    Tasks.image_portrait_stylization: [OutputKeys.OUTPUT_IMG],

    # live category recognition result for single video
    # {
@@ -117,28 +151,19 @@ TASK_OUTPUTS = {
    #       "labels": ['女装/女士精品>>棉衣/棉服', '女装/女士精品>>牛仔裤', '女装/女士精品>>裤子>>休闲裤'],
    # }
    Tasks.live_category: [OutputKeys.SCORES, OutputKeys.LABELS],
    # video category recognition result for single video
    # {
    #       "scores": [0.7716429233551025]
    #       "labels": ['生活>>好物推荐'],
    # }
    Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS],

    # pose estimation result for single sample
    # action recognition result for single video
    # {
    #   "poses": np.array with shape [num_pose, num_keypoint, 3],
    #       each keypoint is a array [x, y, score]
    #   "boxes": np.array with shape [num_pose, 4], each box is
    #       [x1, y1, x2, y2]
    #   "output_label": "abseiling"
    # }
    Tasks.pose_estimation: [OutputKeys.POSES, OutputKeys.BOXES],
    Tasks.action_recognition: [OutputKeys.LABELS],

    # ocr detection result for single sample
    # video category recognition result for single video
    # {
    #   "polygons": np.array with shape [num_text, 8], each polygon is
    #       [x1, y1, x2, y2, x3, y3, x4, y4]
    #       "scores": [0.7716429233551025]
    #       "labels": ['生活>>好物推荐'],
    # }
    Tasks.ocr_detection: [OutputKeys.POLYGONS],
    Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS],

    # image embedding result for a single image
    # {
@@ -152,11 +177,11 @@ TASK_OUTPUTS = {
    # }
    Tasks.video_embedding: [OutputKeys.VIDEO_EMBEDDING],

    # image_color_enhance result for a single sample
    # virtual_try_on result for a single sample
    # {
    #    "output_img": np.ndarray with shape [height, width, 3], uint8
    #    "output_img": np.ndarray with shape [height, width, 3]
    # }
    Tasks.image_color_enhance: [OutputKeys.OUTPUT_IMG],
    Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG],

    # ============ nlp tasks ===================

@@ -167,33 +192,6 @@ TASK_OUTPUTS = {
    #   }
    Tasks.text_classification: [OutputKeys.SCORES, OutputKeys.LABELS],

    # text generation result for single sample
    # {
    #   "text": "this is the text generated by a model."
    # }
    Tasks.text_generation: [OutputKeys.TEXT],

    # fill mask result for single sample
    # {
    #   "text": "this is the text which masks filled by model."
    # }
    Tasks.fill_mask: [OutputKeys.TEXT],

    # word segmentation result for single sample
    # {
    #   "output": "今天 天气 不错 ， 适合 出去 游玩"
    # }
    Tasks.word_segmentation: [OutputKeys.OUTPUT],

    # named entity recognition result for single sample
    # {
    #   "output": [
    #     {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"},
    #     {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"}
    #   ]
    # }
    Tasks.named_entity_recognition: [OutputKeys.OUTPUT],

    # sentence similarity result for single sample
    #   {
    #       "scores": 0.9
@@ -201,11 +199,12 @@ TASK_OUTPUTS = {
    #   }
    Tasks.sentence_similarity: [OutputKeys.SCORES, OutputKeys.LABELS],

    # translation result for a source sentence
    # nli result for single sample
    #   {
    #       "translation": “北京是中国的首都”
    #       "labels": ["happy", "sad", "calm", "angry"],
    #       "scores": [0.9, 0.1, 0.05, 0.05]
    #   }
    Tasks.translation: [OutputKeys.TRANSLATION],
    Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS],

    # sentiment classification result for single sample
    #   {
@@ -221,14 +220,78 @@ TASK_OUTPUTS = {
    #   }
    Tasks.zero_shot_classification: [OutputKeys.SCORES, OutputKeys.LABELS],

    # nli result for single sample
    # relation extraction result for a single sample
    # {
    #     "uuid": "人生信息-1",
    #     "text": "《父老乡亲》是由是由由中国人民解放军海政文工团创作的军旅歌曲，石顺义作词，王锡仁作曲，范琳琳演唱",
    #     "spo_list": [{"subject": "石顺义", "predicate": "国籍", "object": "中国"}]
    # }
    Tasks.relation_extraction:
    [OutputKeys.UUID, OutputKeys.TEXT, OutputKeys.SPO_LIST],

    # translation result for a source sentence
    #   {
    #       "labels": ["happy", "sad", "calm", "angry"],
    #       "scores": [0.9, 0.1, 0.05, 0.05]
    #       "translation": “北京是中国的首都”
    #   }
    Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS],
    Tasks.translation: [OutputKeys.TRANSLATION],

    # word segmentation result for single sample
    # {
    #   "output": "今天 天气 不错 ， 适合 出去 游玩"
    # }
    Tasks.word_segmentation: [OutputKeys.OUTPUT],

    # part-of-speech result for single sample
    # [
    #     {'word': '诸葛', 'label': 'PROPN'},
    #     {'word': '亮', 'label': 'PROPN'},
    #     {'word': '发明', 'label': 'VERB'},
    #     {'word': '八', 'label': 'NUM'},
    #     {'word': '阵', 'label': 'NOUN'},
    #     {'word': '图', 'label': 'PART'},
    #     {'word': '以', 'label': 'ADV'},
    #     {'word': '利', 'label': 'VERB'},
    #     {'word': '立营', 'label': 'VERB'},
    #     {'word': '练兵', 'label': 'VERB'},
    #     {'word': '.', 'label': 'PUNCT'}
    # ]
    # TODO @wenmeng.zwm support list of result check
    Tasks.part_of_speech: [OutputKeys.WORD, OutputKeys.LABEL],

    # named entity recognition result for single sample
    # {
    #   "output": [
    #     {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"},
    #     {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"}
    #   ]
    # }
    Tasks.named_entity_recognition: [OutputKeys.OUTPUT],

    # dialog intent prediction result for single sample
    # text_error_correction result for a single sample
    # {
    #    "output": "我想吃苹果"
    # }
    Tasks.text_error_correction: [OutputKeys.OUTPUT],

    # text generation result for single sample
    # {
    #   "text": "this is the text generated by a model."
    # }
    Tasks.text_generation: [OutputKeys.TEXT],

    # text feature extraction for single sample
    # {
    #   "text_embedding": np.array with shape [1, D]
    # }
    Tasks.sentence_embedding: [OutputKeys.TEXT_EMBEDDING],

    # fill mask result for single sample
    # {
    #   "text": "this is the text which masks filled by model."
    # }
    Tasks.fill_mask: [OutputKeys.TEXT],

    # (Deprecated) dialog intent prediction result for single sample
    # {'pred': array([2.62349960e-03, 4.12110658e-03, 4.12748595e-05, 3.77560973e-05,
    #        1.08599677e-04, 1.72710388e-05, 2.95618793e-05, 1.93638436e-04,
    #        6.45841064e-05, 1.15997791e-04, 5.11605394e-05, 9.87020373e-01,
@@ -252,11 +315,11 @@ TASK_OUTPUTS = {
    Tasks.dialog_intent_prediction:
    [OutputKeys.PREDICTION, OutputKeys.LABEL_POS, OutputKeys.LABEL],

    # dialog modeling prediction result for single sample
    # (Deprecated) dialog modeling prediction result for single sample
    # sys : ['you', 'are', 'welcome', '.', 'have', 'a', 'great', 'day', '!']
    Tasks.dialog_modeling: [OutputKeys.RESPONSE],

    # dialog state tracking result for single sample
    # (Deprecated) dialog state tracking result for single sample
    # {
    #     "dialog_states": {
    #         "taxi-leaveAt": "none",
@@ -294,6 +357,9 @@ TASK_OUTPUTS = {
    Tasks.dialog_state_tracking: [OutputKeys.DIALOG_STATES],

    # ============ audio tasks ===================
    # asr result for single sample
    # { "text": "每一天都要快乐喔"}
    Tasks.auto_speech_recognition: [OutputKeys.TEXT],

    # audio processed for single file in PCM format
    # {
@@ -303,30 +369,19 @@ TASK_OUTPUTS = {
    Tasks.acoustic_echo_cancellation: [OutputKeys.OUTPUT_PCM],
    Tasks.acoustic_noise_suppression: [OutputKeys.OUTPUT_PCM],

    # ============ multi-modal tasks ===================

    # image caption result for single sample
    # text_to_speech result for a single sample
    # {
    #   "caption": "this is an image caption text."
    #    "output_pcm": {"input_label" : np.ndarray with shape [D]}
    # }
    Tasks.image_captioning: [OutputKeys.CAPTION],
    Tasks.text_to_speech: [OutputKeys.OUTPUT_PCM],

    # multi-modal embedding result for single sample
    # {
    #   "img_embedding": np.array with shape [1, D],
    #   "text_embedding": np.array with shape [1, D]
    # }
    Tasks.multi_modal_embedding:
    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING],
    # ============ multi-modal tasks ===================

    # generative multi-modal embedding result for single sample
    # image caption result for single sample
    # {
    #   "img_embedding": np.array with shape [1, D],
    #   "text_embedding": np.array with shape [1, D],
    #   "caption": "this is an image caption text."
    # }
    Tasks.generative_multi_modal_embedding:
    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION],
    Tasks.image_captioning: [OutputKeys.CAPTION],

    # visual grounding result for single sample
    # {
@@ -350,25 +405,31 @@ TASK_OUTPUTS = {
    #    "output_pcm": {"input_label" : np.ndarray with shape [D]}
    # }
    Tasks.text_to_speech: [OutputKeys.OUTPUT_PCM],
    # virtual_try_on result for a single sample

    # multi-modal embedding result for single sample
    # {
    #    "output_img": np.ndarray with shape [height, width, 3]
    #   "img_embedding": np.array with shape [1, D],
    #   "text_embedding": np.array with shape [1, D]
    # }
    Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG],
    # visual_question_answering result for a single sample
    Tasks.multi_modal_embedding:
    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING],

    # generative multi-modal embedding result for single sample
    # {
    #    "text": "this is the text generated by a model."
    #   "img_embedding": np.array with shape [1, D],
    #   "text_embedding": np.array with shape [1, D],
    #   "caption": "this is an image caption text."
    # }
    Tasks.generative_multi_modal_embedding:
    [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION],

    # VQA result for a sample
    # {"text": "this is a text answser. "}
    Tasks.visual_question_answering: [OutputKeys.TEXT],
    # auto_speech_recognition result for a single sample
    # {
    #    "text": "每天都要快乐喔"
    # }
    Tasks.auto_speech_recognition: [OutputKeys.TEXT],

    # text_error_correction result for a single sample
    # {
    #    "output": "我想吃苹果"
    #       "scores": [0.9, 0.1, 0.1],
    #       "labels": ["entailment", "contradiction", "neutral"]
    # }
    Tasks.text_error_correction: [OutputKeys.OUTPUT]
    Tasks.visual_entailment: [OutputKeys.SCORES, OutputKeys.LABELS],
 }
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -39,8 +39,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
                            'damo/cv_resnet18_human-detection'),
    Tasks.image_object_detection: (Pipelines.object_detection,
                                   'damo/cv_vit_object-detection_coco'),
    Tasks.image_denoise: (Pipelines.image_denoise,
                          'damo/cv_nafnet_image-denoise_sidd'),
    Tasks.image_denoising: (Pipelines.image_denoise,
                            'damo/cv_nafnet_image-denoise_sidd'),
    Tasks.text_classification: (Pipelines.sentiment_analysis,
                                'damo/bert-base-sst2'),
    Tasks.text_generation: (Pipelines.text_generation,
@@ -94,8 +94,9 @@ DEFAULT_MODEL_FOR_PIPELINE = {
    Tasks.video_multi_modal_embedding:
    (Pipelines.video_multi_modal_embedding,
     'damo/multi_modal_clip_vtretrival_msrvtt_53'),
    Tasks.image_color_enhance: (Pipelines.image_color_enhance,
                                'damo/cv_csrnet_image-color-enhance-models'),
    Tasks.image_color_enhancement:
    (Pipelines.image_color_enhance,
     'damo/cv_csrnet_image-color-enhance-models'),
    Tasks.virtual_try_on: (Pipelines.virtual_try_on,
                           'damo/cv_daflow_virtual-try-on_base'),
    Tasks.image_colorization: (Pipelines.image_colorization,
--- a/modelscope/pipelines/cv/image_color_enhance_pipeline.py
+++ b/modelscope/pipelines/cv/image_color_enhance_pipeline.py
@@ -18,7 +18,7 @@ logger = get_logger()


@PIPELINES.register_module(
    Tasks.image_color_enhance, module_name=Pipelines.image_color_enhance)
    Tasks.image_color_enhancement, module_name=Pipelines.image_color_enhance)
 class ImageColorEnhancePipeline(Pipeline):

    def __init__(self,
--- a/modelscope/pipelines/cv/image_denoise_pipeline.py
+++ b/modelscope/pipelines/cv/image_denoise_pipeline.py
@@ -19,7 +19,7 @@ __all__ = ['ImageDenoisePipeline']


@PIPELINES.register_module(
    Tasks.image_denoise, module_name=Pipelines.image_denoise)
    Tasks.image_denoising, module_name=Pipelines.image_denoise)
 class ImageDenoisePipeline(Pipeline):

    def __init__(self,
--- a/modelscope/pipelines/cv/image_matting_pipeline.py
+++ b/modelscope/pipelines/cv/image_matting_pipeline.py
@@ -15,6 +15,8 @@ from modelscope.utils.logger import get_logger
 logger = get_logger()


@PIPELINES.register_module(
    Tasks.protrait_matting, module_name=Pipelines.image_matting)
@PIPELINES.register_module(
    Tasks.image_matting, module_name=Pipelines.image_matting)
 class ImageMattingPipeline(Pipeline):
--- a/modelscope/pipelines/cv/image_to_image_translation_pipeline.py
+++ b/modelscope/pipelines/cv/image_to_image_translation_pipeline.py
@@ -34,7 +34,8 @@ def save_grid(imgs, filename, nrow=5):


@PIPELINES.register_module(
    Tasks.image_generation, module_name=Pipelines.image2image_translation)
    Tasks.image_to_image_translation,
    module_name=Pipelines.image2image_translation)
 class Image2ImageTranslationPipeline(Pipeline):

    def __init__(self, model: str, **kwargs):
--- a/modelscope/pipelines/util.py
+++ b/modelscope/pipelines/util.py
@@ -34,7 +34,8 @@ def is_official_hub_path(path: Union[str, List],
            try:
                _ = HubApi().get_model(path, revision=revision)
                return True
            except Exception:
            except Exception as e:
                logger.warning(f'get model exception: {e}')
                return False

    if isinstance(path, str):
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -14,42 +14,60 @@ class Fields(object):


 class CVTasks(object):
    # vision tasks
    image_to_text = 'image-to-text'
    pose_estimation = 'pose-estimation'
    # ocr
    ocr_detection = 'ocr-detection'
    ocr_recognition = 'ocr-recognition'

    # human face body related
    face_detection = 'face-detection'
    face_recognition = 'face-recognition'
    human_detection = 'human-detection'
    human_object_interaction = 'human-object-interaction'
    face_image_generation = 'face-image-generation'

    image_classification = 'image-classification'
    image_tagging = 'image-tagging'
    object_detection = 'object-detection'
    image_multilabel_classification = 'image-multilabel-classification'
    image_classification_imagenet = 'image-classification-imagenet'
    image_classification_dailylife = 'image-classification-dailylife'

    image_object_detection = 'image-object-detection'
    human_detection = 'human-detection'

    image_segmentation = 'image-segmentation'
    image_editing = 'image-editing'
    image_generation = 'image-generation'
    image_matting = 'image-matting'
    image_denoise = 'image-denoise'
    ocr_detection = 'ocr-detection'
    action_recognition = 'action-recognition'
    video_embedding = 'video-embedding'
    face_detection = 'face-detection'
    face_recognition = 'face-recognition'
    image_color_enhance = 'image-color-enhance'
    virtual_try_on = 'virtual-try-on'
    protrait_matting = 'protrait-matting'

    # image editting
    image_protrait_enhancement = 'image-protrait-enhancement'
    skin_retouching = 'skin-retouching'
    image_super_resolution = 'image-super-resolution'
    image_colorization = 'image-colorization'
    face_image_generation = 'face-image-generation'
    image_color_enhancement = 'image-color-enhancement'
    image_denoising = 'image-denoising'

    # image generation
    image_to_image_translation = 'image-to-image-translation'
    image_to_image_generation = 'image-to-image-generation'
    image_style_transfer = 'image-style-transfer'
    image_super_resolution = 'image-super-resolution'
    image_portrait_stylization = 'image-portrait-stylization'

    image_embedding = 'image-embedding'

    product_retrieval_embedding = 'product-retrieval-embedding'

    # video recognition
    live_category = 'live-category'
    action_recognition = 'action-recognition'
    video_category = 'video-category'
    image_classification_imagenet = 'image-classification-imagenet'
    image_classification_dailylife = 'image-classification-dailylife'
    image_portrait_stylization = 'image-portrait-stylization'
    image_to_image_generation = 'image-to-image-generation'

    video_embedding = 'video-embedding'

    virtual_try_on = 'virtual-try-on'


 class NLPTasks(object):
    # nlp tasks
    word_segmentation = 'word-segmentation'
    part_of_speech = 'part-of-speech'
    named_entity_recognition = 'named-entity-recognition'
    nli = 'nli'
    sentiment_classification = 'sentiment-classification'
@@ -66,7 +84,7 @@ class NLPTasks(object):
    dialog_intent_prediction = 'dialog-intent-prediction'
    dialog_state_tracking = 'dialog-state-tracking'
    table_question_answering = 'table-question-answering'
    feature_extraction = 'feature-extraction'
    sentence_embedding = 'sentence-embedding'
    fill_mask = 'fill-mask'
    summarization = 'summarization'
    question_answering = 'question-answering'
--- a/tests/pipelines/test_builder.py
+++ b/tests/pipelines/test_builder.py
@@ -21,7 +21,7 @@ logger = get_logger()


@PIPELINES.register_module(
    group_key=Tasks.image_tagging, module_name='custom_single_model')
    group_key=Tasks.image_classification, module_name='custom_single_model')
 class CustomSingleModelPipeline(Pipeline):

    def __init__(self,
@@ -38,7 +38,7 @@ class CustomSingleModelPipeline(Pipeline):


@PIPELINES.register_module(
    group_key=Tasks.image_tagging, module_name='model1_model2')
    group_key=Tasks.image_classification, module_name='model1_model2')
 class CustomMultiModelPipeline(Pipeline):

    def __init__(self,
@@ -64,7 +64,7 @@ class PipelineInterfaceTest(unittest.TestCase):
        cfg_file = os.path.join(dirname, ModelFile.CONFIGURATION)
        cfg = {
            ConfigFields.framework: Frameworks.torch,
            ConfigFields.task: Tasks.image_tagging,
            ConfigFields.task: Tasks.image_classification,
            ConfigFields.pipeline: {
                'type': pipeline_name,
            }
@@ -77,12 +77,13 @@ class PipelineInterfaceTest(unittest.TestCase):
        self.prepare_dir('/tmp/model2', 'model1_model2')

    def test_single_model(self):
        pipe = pipeline(Tasks.image_tagging, model='/tmp/custom_single_model')
        pipe = pipeline(
            Tasks.image_classification, model='/tmp/custom_single_model')
        assert isinstance(pipe, CustomSingleModelPipeline)

    def test_multi_model(self):
        pipe = pipeline(
            Tasks.image_tagging, model=['/tmp/model1', '/tmp/model2'])
            Tasks.image_classification, model=['/tmp/model1', '/tmp/model2'])
        assert isinstance(pipe, CustomMultiModelPipeline)


--- a/tests/pipelines/test_image2image_translation.py
+++ b/tests/pipelines/test_image2image_translation.py
@@ -24,7 +24,7 @@ class Image2ImageTranslationTest(unittest.TestCase):
            just like the following code.
        """
        img2img_gen_pipeline = pipeline(
            Tasks.image_generation,
            Tasks.image_to_image_translation,
            model='damo/cv_latent_diffusion_image2image_translation')
        result = img2img_gen_pipeline(
            ('data/test/images/img2img_input_mask.png',
--- a/tests/pipelines/test_image_color_enhance.py
+++ b/tests/pipelines/test_image_color_enhance.py
@@ -27,13 +27,13 @@ class ImageColorEnhanceTest(unittest.TestCase):
    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_modelhub(self):
        img_color_enhance = pipeline(
            Tasks.image_color_enhance, model=self.model_id)
            Tasks.image_color_enhancement, model=self.model_id)
        self.pipeline_inference(img_color_enhance,
                                'data/test/images/image_color_enhance.png')

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_modelhub_default_model(self):
        img_color_enhance = pipeline(Tasks.image_color_enhance)
        img_color_enhance = pipeline(Tasks.image_color_enhancement)
        self.pipeline_inference(img_color_enhance,
                                'data/test/images/image_color_enhance.png')

--- a/tests/pipelines/test_image_denoise.py
+++ b/tests/pipelines/test_image_denoise.py
@@ -30,7 +30,7 @@ class ImageDenoiseTest(unittest.TestCase):
    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_with_model_from_modelhub(self):
        model = Model.from_pretrained(self.model_id)
        pipeline_ins = pipeline(task=Tasks.image_denoise, model=model)
        pipeline_ins = pipeline(task=Tasks.image_denoising, model=model)
        denoise_img = pipeline_ins(
            input=self.demo_image_path)[OutputKeys.OUTPUT_IMG]
        denoise_img = Image.fromarray(denoise_img)
@@ -39,7 +39,8 @@ class ImageDenoiseTest(unittest.TestCase):

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_run_with_model_name(self):
        pipeline_ins = pipeline(task=Tasks.image_denoise, model=self.model_id)
        pipeline_ins = pipeline(
            task=Tasks.image_denoising, model=self.model_id)
        denoise_img = pipeline_ins(
            input=self.demo_image_path)[OutputKeys.OUTPUT_IMG]
        denoise_img = Image.fromarray(denoise_img)
@@ -48,7 +49,7 @@ class ImageDenoiseTest(unittest.TestCase):

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_with_default_model(self):
        pipeline_ins = pipeline(task=Tasks.image_denoise)
        pipeline_ins = pipeline(task=Tasks.image_denoising)
        denoise_img = pipeline_ins(
            input=self.demo_image_path)[OutputKeys.OUTPUT_IMG]
        denoise_img = Image.fromarray(denoise_img)
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -26,7 +26,7 @@ class ImageMattingTest(unittest.TestCase):
            model_file = osp.join(tmp_dir, ModelFile.TF_GRAPH_FILE)
            with open(model_file, 'wb') as ofile:
                ofile.write(File.read(model_path))
            img_matting = pipeline(Tasks.image_matting, model=tmp_dir)
            img_matting = pipeline(Tasks.protrait_matting, model=tmp_dir)

            result = img_matting('data/test/images/image_matting.png')
            cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
@@ -38,7 +38,7 @@ class ImageMattingTest(unittest.TestCase):
        # input_location = '/dir/to/images'

        dataset = MsDataset.load(input_location, target='image')
        img_matting = pipeline(Tasks.image_matting, model=self.model_id)
        img_matting = pipeline(Tasks.protrait_matting, model=self.model_id)
        # note that for dataset output, the inference-output is a Generator that can be iterated.
        result = img_matting(dataset)
        cv2.imwrite('result.png', next(result)[OutputKeys.OUTPUT_IMG])
@@ -46,7 +46,7 @@ class ImageMattingTest(unittest.TestCase):

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_modelhub(self):
        img_matting = pipeline(Tasks.image_matting, model=self.model_id)
        img_matting = pipeline(Tasks.protrait_matting, model=self.model_id)

        result = img_matting('data/test/images/image_matting.png')
        cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
@@ -54,7 +54,7 @@ class ImageMattingTest(unittest.TestCase):

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_modelhub_default_model(self):
        img_matting = pipeline(Tasks.image_matting)
        img_matting = pipeline(Tasks.protrait_matting)

        result = img_matting('data/test/images/image_matting.png')
        cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG])
@@ -67,7 +67,7 @@ class ImageMattingTest(unittest.TestCase):
            namespace='damotest',
            split='test',
            target='file')
        img_matting = pipeline(Tasks.image_matting, model=self.model_id)
        img_matting = pipeline(Tasks.protrait_matting, model=self.model_id)
        result = img_matting(dataset)
        for i in range(2):
            cv2.imwrite(f'result_{i}.png', next(result)[OutputKeys.OUTPUT_IMG])
--- a/tests/utils/test_registry.py
+++ b/tests/utils/test_registry.py
@@ -42,12 +42,13 @@ class RegistryTest(unittest.TestCase):
            MODELS.get('Bert', Tasks.sentiment_analysis) is
            BertForSentimentAnalysis)

        @MODELS.register_module(Tasks.object_detection)
        @MODELS.register_module(Tasks.image_object_detection)
        class DETR(object):
            pass

        self.assertTrue(Tasks.object_detection in MODELS.modules)
        self.assertTrue(MODELS.get('DETR', Tasks.object_detection) is DETR)
        self.assertTrue(Tasks.image_object_detection in MODELS.modules)
        self.assertTrue(
            MODELS.get('DETR', Tasks.image_object_detection) is DETR)

        self.assertEqual(len(MODELS.modules), 4)