From 7798a6250a9045f9239e6646f49411cd11f40708 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Tue, 2 Aug 2022 20:21:05 +0800 Subject: [PATCH] [to #43112692] stardarized task name and output 1. task name and output definition: [link](https://alidocs.dingtalk.com/i/nodes/KOEmgBoGwD78vd2bAry3VndLerP9b30a?nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA&iframeQuery=utm_source%3Dportal%26utm_medium%3Dportal_space_file_tree) 2. rearrange task definition and add more outputs definition for tasks Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9585469 --- modelscope/metrics/builder.py | 4 +- .../image_color_enhance.py | 3 +- .../image_denoise/nafnet_for_image_denoise.py | 2 +- modelscope/outputs.py | 303 +++++++++++------- modelscope/pipelines/builder.py | 9 +- .../cv/image_color_enhance_pipeline.py | 2 +- .../pipelines/cv/image_denoise_pipeline.py | 2 +- .../pipelines/cv/image_matting_pipeline.py | 2 + .../cv/image_to_image_translation_pipeline.py | 3 +- modelscope/pipelines/util.py | 3 +- modelscope/utils/constant.py | 64 ++-- tests/pipelines/test_builder.py | 11 +- .../pipelines/test_image2image_translation.py | 2 +- tests/pipelines/test_image_color_enhance.py | 4 +- tests/pipelines/test_image_denoise.py | 7 +- tests/pipelines/test_image_matting.py | 10 +- tests/utils/test_registry.py | 7 +- 17 files changed, 263 insertions(+), 175 deletions(-) diff --git a/modelscope/metrics/builder.py b/modelscope/metrics/builder.py index ab837ff0..5b9f962e 100644 --- a/modelscope/metrics/builder.py +++ b/modelscope/metrics/builder.py @@ -22,8 +22,8 @@ task_default_metrics = { Tasks.sentence_similarity: [Metrics.seq_cls_metric], Tasks.sentiment_classification: [Metrics.seq_cls_metric], Tasks.text_generation: [Metrics.text_gen_metric], - Tasks.image_denoise: [Metrics.image_denoise_metric], - Tasks.image_color_enhance: [Metrics.image_color_enhance_metric] + Tasks.image_denoising: [Metrics.image_denoise_metric], + Tasks.image_color_enhancement: [Metrics.image_color_enhance_metric] } diff --git a/modelscope/models/cv/image_color_enhance/image_color_enhance.py b/modelscope/models/cv/image_color_enhance/image_color_enhance.py index d142e682..382cc152 100644 --- a/modelscope/models/cv/image_color_enhance/image_color_enhance.py +++ b/modelscope/models/cv/image_color_enhance/image_color_enhance.py @@ -17,7 +17,8 @@ logger = get_logger() __all__ = ['ImageColorEnhance'] -@MODELS.register_module(Tasks.image_color_enhance, module_name=Models.csrnet) +@MODELS.register_module( + Tasks.image_color_enhancement, module_name=Models.csrnet) class ImageColorEnhance(TorchModel): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py b/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py index 35f0eb5a..eaf5d0c5 100644 --- a/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py +++ b/modelscope/models/cv/image_denoise/nafnet_for_image_denoise.py @@ -19,7 +19,7 @@ logger = get_logger() __all__ = ['NAFNetForImageDenoise'] -@MODELS.register_module(Tasks.image_denoise, module_name=Models.nafnet) +@MODELS.register_module(Tasks.image_denoising, module_name=Models.nafnet) class NAFNetForImageDenoise(TorchModel): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/outputs.py b/modelscope/outputs.py index c28f2fb9..20254416 100644 --- a/modelscope/outputs.py +++ b/modelscope/outputs.py @@ -21,6 +21,7 @@ class OutputKeys(object): OUTPUT_IMG = 'output_img' OUTPUT_PCM = 'output_pcm' IMG_EMBEDDING = 'img_embedding' + SPO_LIST = 'spo_list' TEXT_EMBEDDING = 'text_embedding' TRANSLATION = 'translation' RESPONSE = 'response' @@ -29,32 +30,21 @@ class OutputKeys(object): PROBABILITIES = 'probabilities' DIALOG_STATES = 'dialog_states' VIDEO_EMBEDDING = 'video_embedding' + UUID = 'uuid' + WORD = 'word' + KWS_LIST = 'kws_list' TASK_OUTPUTS = { # ============ vision tasks =================== - # image classification result for single sample - # { - # "scores": [0.9, 0.1, 0.05, 0.05] - # "labels": ["dog", "horse", "cow", "cat"], - # } - Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS], - Tasks.image_tagging: [OutputKeys.SCORES, OutputKeys.LABELS], - - # object detection result for single sample - # { - # "scores": [0.9, 0.1, 0.05, 0.05] - # "labels": ["dog", "horse", "cow", "cat"], - # "boxes": [ - # [x1, y1, x2, y2], - # [x1, y1, x2, y2], - # [x1, y1, x2, y2], - # ], - # } - Tasks.object_detection: - [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], + # ocr detection result for single sample + # { + # "polygons": np.array with shape [num_text, 8], each polygon is + # [x1, y1, x2, y2, x3, y3, x4, y4] + # } + Tasks.ocr_detection: [OutputKeys.POLYGONS], # face detection result for single sample # { @@ -81,35 +71,79 @@ TASK_OUTPUTS = { # } Tasks.face_recognition: [OutputKeys.IMG_EMBEDDING], + # human detection result for single sample + # { + # "scores": [0.9, 0.1, 0.05, 0.05] + # "labels": ["person", "person", "person", "person"], + # "boxes": [ + # [x1, y1, x2, y2], + # [x1, y1, x2, y2], + # [x1, y1, x2, y2], + # ], + # } + # + Tasks.human_detection: + [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], + + # face generation result for single sample + # { + # "output_img": np.array with shape(h, w, 3) + # } + Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG], + + # image classification result for single sample + # { + # "scores": [0.9, 0.1, 0.05, 0.05] + # "labels": ["dog", "horse", "cow", "cat"], + # } + Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS], + + # object detection result for single sample + # { + # "scores": [0.9, 0.1, 0.05, 0.05] + # "labels": ["dog", "horse", "cow", "cat"], + # "boxes": [ + # [x1, y1, x2, y2], + # [x1, y1, x2, y2], + # [x1, y1, x2, y2], + # ], + # } + Tasks.image_object_detection: + [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], + # instance segmentation result for single sample # { # "scores": [0.9, 0.1, 0.05, 0.05], # "labels": ["dog", "horse", "cow", "cat"], - # "boxes": [ - # np.array in bgr channel order + # "masks": [ + # np.array # 2D array containing only 0, 1 # ] # } Tasks.image_segmentation: - [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], + [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS], - # image generation/editing/matting result for single sample + # image matting result for single sample # { # "output_img": np.array with shape(h, w, 4) - # for matting or (h, w, 3) for general purpose # } - Tasks.image_editing: [OutputKeys.OUTPUT_IMG], Tasks.image_matting: [OutputKeys.OUTPUT_IMG], - Tasks.image_generation: [OutputKeys.OUTPUT_IMG], - Tasks.image_denoise: [OutputKeys.OUTPUT_IMG], - Tasks.image_colorization: [OutputKeys.OUTPUT_IMG], - Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG], + Tasks.protrait_matting: [OutputKeys.OUTPUT_IMG], + + # image editing task result for a single image + # {"output_img": np.array with shape (h, w, 3)} + Tasks.image_protrait_enhancement: [OutputKeys.OUTPUT_IMG], + Tasks.skin_retouching: [OutputKeys.OUTPUT_IMG], Tasks.image_super_resolution: [OutputKeys.OUTPUT_IMG], + Tasks.image_colorization: [OutputKeys.OUTPUT_IMG], + Tasks.image_color_enhancement: [OutputKeys.OUTPUT_IMG], + Tasks.image_denoising: [OutputKeys.OUTPUT_IMG], - # action recognition result for single video - # { - # "output_label": "abseiling" - # } - Tasks.action_recognition: [OutputKeys.LABELS], + # image generation task result for a single image + # {"output_img": np.array with shape (h, w, 3)} + Tasks.image_to_image_generation: [OutputKeys.OUTPUT_IMG], + Tasks.image_to_image_translation: [OutputKeys.OUTPUT_IMG], + Tasks.image_style_transfer: [OutputKeys.OUTPUT_IMG], + Tasks.image_portrait_stylization: [OutputKeys.OUTPUT_IMG], # live category recognition result for single video # { @@ -117,28 +151,19 @@ TASK_OUTPUTS = { # "labels": ['女装/女士精品>>棉衣/棉服', '女装/女士精品>>牛仔裤', '女装/女士精品>>裤子>>休闲裤'], # } Tasks.live_category: [OutputKeys.SCORES, OutputKeys.LABELS], - # video category recognition result for single video - # { - # "scores": [0.7716429233551025] - # "labels": ['生活>>好物推荐'], - # } - Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS], - # pose estimation result for single sample + # action recognition result for single video # { - # "poses": np.array with shape [num_pose, num_keypoint, 3], - # each keypoint is a array [x, y, score] - # "boxes": np.array with shape [num_pose, 4], each box is - # [x1, y1, x2, y2] + # "output_label": "abseiling" # } - Tasks.pose_estimation: [OutputKeys.POSES, OutputKeys.BOXES], + Tasks.action_recognition: [OutputKeys.LABELS], - # ocr detection result for single sample + # video category recognition result for single video # { - # "polygons": np.array with shape [num_text, 8], each polygon is - # [x1, y1, x2, y2, x3, y3, x4, y4] + # "scores": [0.7716429233551025] + # "labels": ['生活>>好物推荐'], # } - Tasks.ocr_detection: [OutputKeys.POLYGONS], + Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS], # image embedding result for a single image # { @@ -152,11 +177,11 @@ TASK_OUTPUTS = { # } Tasks.video_embedding: [OutputKeys.VIDEO_EMBEDDING], - # image_color_enhance result for a single sample + # virtual_try_on result for a single sample # { - # "output_img": np.ndarray with shape [height, width, 3], uint8 + # "output_img": np.ndarray with shape [height, width, 3] # } - Tasks.image_color_enhance: [OutputKeys.OUTPUT_IMG], + Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG], # ============ nlp tasks =================== @@ -167,33 +192,6 @@ TASK_OUTPUTS = { # } Tasks.text_classification: [OutputKeys.SCORES, OutputKeys.LABELS], - # text generation result for single sample - # { - # "text": "this is the text generated by a model." - # } - Tasks.text_generation: [OutputKeys.TEXT], - - # fill mask result for single sample - # { - # "text": "this is the text which masks filled by model." - # } - Tasks.fill_mask: [OutputKeys.TEXT], - - # word segmentation result for single sample - # { - # "output": "今天 天气 不错 , 适合 出去 游玩" - # } - Tasks.word_segmentation: [OutputKeys.OUTPUT], - - # named entity recognition result for single sample - # { - # "output": [ - # {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"}, - # {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"} - # ] - # } - Tasks.named_entity_recognition: [OutputKeys.OUTPUT], - # sentence similarity result for single sample # { # "scores": 0.9 @@ -201,11 +199,12 @@ TASK_OUTPUTS = { # } Tasks.sentence_similarity: [OutputKeys.SCORES, OutputKeys.LABELS], - # translation result for a source sentence + # nli result for single sample # { - # "translation": “北京是中国的首都” + # "labels": ["happy", "sad", "calm", "angry"], + # "scores": [0.9, 0.1, 0.05, 0.05] # } - Tasks.translation: [OutputKeys.TRANSLATION], + Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS], # sentiment classification result for single sample # { @@ -221,14 +220,78 @@ TASK_OUTPUTS = { # } Tasks.zero_shot_classification: [OutputKeys.SCORES, OutputKeys.LABELS], - # nli result for single sample + # relation extraction result for a single sample + # { + # "uuid": "人生信息-1", + # "text": "《父老乡亲》是由是由由中国人民解放军海政文工团创作的军旅歌曲,石顺义作词,王锡仁作曲,范琳琳演唱", + # "spo_list": [{"subject": "石顺义", "predicate": "国籍", "object": "中国"}] + # } + Tasks.relation_extraction: + [OutputKeys.UUID, OutputKeys.TEXT, OutputKeys.SPO_LIST], + + # translation result for a source sentence # { - # "labels": ["happy", "sad", "calm", "angry"], - # "scores": [0.9, 0.1, 0.05, 0.05] + # "translation": “北京是中国的首都” # } - Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS], + Tasks.translation: [OutputKeys.TRANSLATION], + + # word segmentation result for single sample + # { + # "output": "今天 天气 不错 , 适合 出去 游玩" + # } + Tasks.word_segmentation: [OutputKeys.OUTPUT], + + # part-of-speech result for single sample + # [ + # {'word': '诸葛', 'label': 'PROPN'}, + # {'word': '亮', 'label': 'PROPN'}, + # {'word': '发明', 'label': 'VERB'}, + # {'word': '八', 'label': 'NUM'}, + # {'word': '阵', 'label': 'NOUN'}, + # {'word': '图', 'label': 'PART'}, + # {'word': '以', 'label': 'ADV'}, + # {'word': '利', 'label': 'VERB'}, + # {'word': '立营', 'label': 'VERB'}, + # {'word': '练兵', 'label': 'VERB'}, + # {'word': '.', 'label': 'PUNCT'} + # ] + # TODO @wenmeng.zwm support list of result check + Tasks.part_of_speech: [OutputKeys.WORD, OutputKeys.LABEL], + + # named entity recognition result for single sample + # { + # "output": [ + # {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"}, + # {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"} + # ] + # } + Tasks.named_entity_recognition: [OutputKeys.OUTPUT], - # dialog intent prediction result for single sample + # text_error_correction result for a single sample + # { + # "output": "我想吃苹果" + # } + Tasks.text_error_correction: [OutputKeys.OUTPUT], + + # text generation result for single sample + # { + # "text": "this is the text generated by a model." + # } + Tasks.text_generation: [OutputKeys.TEXT], + + # text feature extraction for single sample + # { + # "text_embedding": np.array with shape [1, D] + # } + Tasks.sentence_embedding: [OutputKeys.TEXT_EMBEDDING], + + # fill mask result for single sample + # { + # "text": "this is the text which masks filled by model." + # } + Tasks.fill_mask: [OutputKeys.TEXT], + + # (Deprecated) dialog intent prediction result for single sample # {'pred': array([2.62349960e-03, 4.12110658e-03, 4.12748595e-05, 3.77560973e-05, # 1.08599677e-04, 1.72710388e-05, 2.95618793e-05, 1.93638436e-04, # 6.45841064e-05, 1.15997791e-04, 5.11605394e-05, 9.87020373e-01, @@ -252,11 +315,11 @@ TASK_OUTPUTS = { Tasks.dialog_intent_prediction: [OutputKeys.PREDICTION, OutputKeys.LABEL_POS, OutputKeys.LABEL], - # dialog modeling prediction result for single sample + # (Deprecated) dialog modeling prediction result for single sample # sys : ['you', 'are', 'welcome', '.', 'have', 'a', 'great', 'day', '!'] Tasks.dialog_modeling: [OutputKeys.RESPONSE], - # dialog state tracking result for single sample + # (Deprecated) dialog state tracking result for single sample # { # "dialog_states": { # "taxi-leaveAt": "none", @@ -294,6 +357,9 @@ TASK_OUTPUTS = { Tasks.dialog_state_tracking: [OutputKeys.DIALOG_STATES], # ============ audio tasks =================== + # asr result for single sample + # { "text": "每一天都要快乐喔"} + Tasks.auto_speech_recognition: [OutputKeys.TEXT], # audio processed for single file in PCM format # { @@ -303,30 +369,19 @@ TASK_OUTPUTS = { Tasks.acoustic_echo_cancellation: [OutputKeys.OUTPUT_PCM], Tasks.acoustic_noise_suppression: [OutputKeys.OUTPUT_PCM], - # ============ multi-modal tasks =================== - - # image caption result for single sample + # text_to_speech result for a single sample # { - # "caption": "this is an image caption text." + # "output_pcm": {"input_label" : np.ndarray with shape [D]} # } - Tasks.image_captioning: [OutputKeys.CAPTION], + Tasks.text_to_speech: [OutputKeys.OUTPUT_PCM], - # multi-modal embedding result for single sample - # { - # "img_embedding": np.array with shape [1, D], - # "text_embedding": np.array with shape [1, D] - # } - Tasks.multi_modal_embedding: - [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING], + # ============ multi-modal tasks =================== - # generative multi-modal embedding result for single sample + # image caption result for single sample # { - # "img_embedding": np.array with shape [1, D], - # "text_embedding": np.array with shape [1, D], # "caption": "this is an image caption text." # } - Tasks.generative_multi_modal_embedding: - [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION], + Tasks.image_captioning: [OutputKeys.CAPTION], # visual grounding result for single sample # { @@ -350,25 +405,31 @@ TASK_OUTPUTS = { # "output_pcm": {"input_label" : np.ndarray with shape [D]} # } Tasks.text_to_speech: [OutputKeys.OUTPUT_PCM], - # virtual_try_on result for a single sample + + # multi-modal embedding result for single sample # { - # "output_img": np.ndarray with shape [height, width, 3] + # "img_embedding": np.array with shape [1, D], + # "text_embedding": np.array with shape [1, D] # } - Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG], - # visual_question_answering result for a single sample + Tasks.multi_modal_embedding: + [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING], + + # generative multi-modal embedding result for single sample # { - # "text": "this is the text generated by a model." + # "img_embedding": np.array with shape [1, D], + # "text_embedding": np.array with shape [1, D], + # "caption": "this is an image caption text." # } + Tasks.generative_multi_modal_embedding: + [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION], + + # VQA result for a sample + # {"text": "this is a text answser. "} Tasks.visual_question_answering: [OutputKeys.TEXT], - # auto_speech_recognition result for a single sample - # { - # "text": "每天都要快乐喔" - # } - Tasks.auto_speech_recognition: [OutputKeys.TEXT], - # text_error_correction result for a single sample # { - # "output": "我想吃苹果" + # "scores": [0.9, 0.1, 0.1], + # "labels": ["entailment", "contradiction", "neutral"] # } - Tasks.text_error_correction: [OutputKeys.OUTPUT] + Tasks.visual_entailment: [OutputKeys.SCORES, OutputKeys.LABELS], } diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index d5cfba3d..4cf1924b 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -39,8 +39,8 @@ DEFAULT_MODEL_FOR_PIPELINE = { 'damo/cv_resnet18_human-detection'), Tasks.image_object_detection: (Pipelines.object_detection, 'damo/cv_vit_object-detection_coco'), - Tasks.image_denoise: (Pipelines.image_denoise, - 'damo/cv_nafnet_image-denoise_sidd'), + Tasks.image_denoising: (Pipelines.image_denoise, + 'damo/cv_nafnet_image-denoise_sidd'), Tasks.text_classification: (Pipelines.sentiment_analysis, 'damo/bert-base-sst2'), Tasks.text_generation: (Pipelines.text_generation, @@ -94,8 +94,9 @@ DEFAULT_MODEL_FOR_PIPELINE = { Tasks.video_multi_modal_embedding: (Pipelines.video_multi_modal_embedding, 'damo/multi_modal_clip_vtretrival_msrvtt_53'), - Tasks.image_color_enhance: (Pipelines.image_color_enhance, - 'damo/cv_csrnet_image-color-enhance-models'), + Tasks.image_color_enhancement: + (Pipelines.image_color_enhance, + 'damo/cv_csrnet_image-color-enhance-models'), Tasks.virtual_try_on: (Pipelines.virtual_try_on, 'damo/cv_daflow_virtual-try-on_base'), Tasks.image_colorization: (Pipelines.image_colorization, diff --git a/modelscope/pipelines/cv/image_color_enhance_pipeline.py b/modelscope/pipelines/cv/image_color_enhance_pipeline.py index b9007f77..40777d60 100644 --- a/modelscope/pipelines/cv/image_color_enhance_pipeline.py +++ b/modelscope/pipelines/cv/image_color_enhance_pipeline.py @@ -18,7 +18,7 @@ logger = get_logger() @PIPELINES.register_module( - Tasks.image_color_enhance, module_name=Pipelines.image_color_enhance) + Tasks.image_color_enhancement, module_name=Pipelines.image_color_enhance) class ImageColorEnhancePipeline(Pipeline): def __init__(self, diff --git a/modelscope/pipelines/cv/image_denoise_pipeline.py b/modelscope/pipelines/cv/image_denoise_pipeline.py index 0c6c878a..64aa3bc9 100644 --- a/modelscope/pipelines/cv/image_denoise_pipeline.py +++ b/modelscope/pipelines/cv/image_denoise_pipeline.py @@ -19,7 +19,7 @@ __all__ = ['ImageDenoisePipeline'] @PIPELINES.register_module( - Tasks.image_denoise, module_name=Pipelines.image_denoise) + Tasks.image_denoising, module_name=Pipelines.image_denoise) class ImageDenoisePipeline(Pipeline): def __init__(self, diff --git a/modelscope/pipelines/cv/image_matting_pipeline.py b/modelscope/pipelines/cv/image_matting_pipeline.py index 2faaec37..f440440d 100644 --- a/modelscope/pipelines/cv/image_matting_pipeline.py +++ b/modelscope/pipelines/cv/image_matting_pipeline.py @@ -15,6 +15,8 @@ from modelscope.utils.logger import get_logger logger = get_logger() +@PIPELINES.register_module( + Tasks.protrait_matting, module_name=Pipelines.image_matting) @PIPELINES.register_module( Tasks.image_matting, module_name=Pipelines.image_matting) class ImageMattingPipeline(Pipeline): diff --git a/modelscope/pipelines/cv/image_to_image_translation_pipeline.py b/modelscope/pipelines/cv/image_to_image_translation_pipeline.py index a9f83e02..78901c9b 100644 --- a/modelscope/pipelines/cv/image_to_image_translation_pipeline.py +++ b/modelscope/pipelines/cv/image_to_image_translation_pipeline.py @@ -34,7 +34,8 @@ def save_grid(imgs, filename, nrow=5): @PIPELINES.register_module( - Tasks.image_generation, module_name=Pipelines.image2image_translation) + Tasks.image_to_image_translation, + module_name=Pipelines.image2image_translation) class Image2ImageTranslationPipeline(Pipeline): def __init__(self, model: str, **kwargs): diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py index 03383bc1..2c2c7751 100644 --- a/modelscope/pipelines/util.py +++ b/modelscope/pipelines/util.py @@ -34,7 +34,8 @@ def is_official_hub_path(path: Union[str, List], try: _ = HubApi().get_model(path, revision=revision) return True - except Exception: + except Exception as e: + logger.warning(f'get model exception: {e}') return False if isinstance(path, str): diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 73c55152..20311fba 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -14,42 +14,60 @@ class Fields(object): class CVTasks(object): - # vision tasks - image_to_text = 'image-to-text' - pose_estimation = 'pose-estimation' + # ocr + ocr_detection = 'ocr-detection' + ocr_recognition = 'ocr-recognition' + + # human face body related + face_detection = 'face-detection' + face_recognition = 'face-recognition' + human_detection = 'human-detection' + human_object_interaction = 'human-object-interaction' + face_image_generation = 'face-image-generation' + image_classification = 'image-classification' - image_tagging = 'image-tagging' - object_detection = 'object-detection' + image_multilabel_classification = 'image-multilabel-classification' + image_classification_imagenet = 'image-classification-imagenet' + image_classification_dailylife = 'image-classification-dailylife' + image_object_detection = 'image-object-detection' - human_detection = 'human-detection' + image_segmentation = 'image-segmentation' - image_editing = 'image-editing' - image_generation = 'image-generation' image_matting = 'image-matting' - image_denoise = 'image-denoise' - ocr_detection = 'ocr-detection' - action_recognition = 'action-recognition' - video_embedding = 'video-embedding' - face_detection = 'face-detection' - face_recognition = 'face-recognition' - image_color_enhance = 'image-color-enhance' - virtual_try_on = 'virtual-try-on' + protrait_matting = 'protrait-matting' + + # image editting + image_protrait_enhancement = 'image-protrait-enhancement' + skin_retouching = 'skin-retouching' + image_super_resolution = 'image-super-resolution' image_colorization = 'image-colorization' - face_image_generation = 'face-image-generation' + image_color_enhancement = 'image-color-enhancement' + image_denoising = 'image-denoising' + + # image generation + image_to_image_translation = 'image-to-image-translation' + image_to_image_generation = 'image-to-image-generation' image_style_transfer = 'image-style-transfer' - image_super_resolution = 'image-super-resolution' + image_portrait_stylization = 'image-portrait-stylization' + + image_embedding = 'image-embedding' + product_retrieval_embedding = 'product-retrieval-embedding' + + # video recognition live_category = 'live-category' + action_recognition = 'action-recognition' video_category = 'video-category' - image_classification_imagenet = 'image-classification-imagenet' - image_classification_dailylife = 'image-classification-dailylife' - image_portrait_stylization = 'image-portrait-stylization' - image_to_image_generation = 'image-to-image-generation' + + video_embedding = 'video-embedding' + + virtual_try_on = 'virtual-try-on' class NLPTasks(object): # nlp tasks word_segmentation = 'word-segmentation' + part_of_speech = 'part-of-speech' named_entity_recognition = 'named-entity-recognition' nli = 'nli' sentiment_classification = 'sentiment-classification' @@ -66,7 +84,7 @@ class NLPTasks(object): dialog_intent_prediction = 'dialog-intent-prediction' dialog_state_tracking = 'dialog-state-tracking' table_question_answering = 'table-question-answering' - feature_extraction = 'feature-extraction' + sentence_embedding = 'sentence-embedding' fill_mask = 'fill-mask' summarization = 'summarization' question_answering = 'question-answering' diff --git a/tests/pipelines/test_builder.py b/tests/pipelines/test_builder.py index a91a7391..baef5a6f 100644 --- a/tests/pipelines/test_builder.py +++ b/tests/pipelines/test_builder.py @@ -21,7 +21,7 @@ logger = get_logger() @PIPELINES.register_module( - group_key=Tasks.image_tagging, module_name='custom_single_model') + group_key=Tasks.image_classification, module_name='custom_single_model') class CustomSingleModelPipeline(Pipeline): def __init__(self, @@ -38,7 +38,7 @@ class CustomSingleModelPipeline(Pipeline): @PIPELINES.register_module( - group_key=Tasks.image_tagging, module_name='model1_model2') + group_key=Tasks.image_classification, module_name='model1_model2') class CustomMultiModelPipeline(Pipeline): def __init__(self, @@ -64,7 +64,7 @@ class PipelineInterfaceTest(unittest.TestCase): cfg_file = os.path.join(dirname, ModelFile.CONFIGURATION) cfg = { ConfigFields.framework: Frameworks.torch, - ConfigFields.task: Tasks.image_tagging, + ConfigFields.task: Tasks.image_classification, ConfigFields.pipeline: { 'type': pipeline_name, } @@ -77,12 +77,13 @@ class PipelineInterfaceTest(unittest.TestCase): self.prepare_dir('/tmp/model2', 'model1_model2') def test_single_model(self): - pipe = pipeline(Tasks.image_tagging, model='/tmp/custom_single_model') + pipe = pipeline( + Tasks.image_classification, model='/tmp/custom_single_model') assert isinstance(pipe, CustomSingleModelPipeline) def test_multi_model(self): pipe = pipeline( - Tasks.image_tagging, model=['/tmp/model1', '/tmp/model2']) + Tasks.image_classification, model=['/tmp/model1', '/tmp/model2']) assert isinstance(pipe, CustomMultiModelPipeline) diff --git a/tests/pipelines/test_image2image_translation.py b/tests/pipelines/test_image2image_translation.py index 24766d25..8380af75 100644 --- a/tests/pipelines/test_image2image_translation.py +++ b/tests/pipelines/test_image2image_translation.py @@ -24,7 +24,7 @@ class Image2ImageTranslationTest(unittest.TestCase): just like the following code. """ img2img_gen_pipeline = pipeline( - Tasks.image_generation, + Tasks.image_to_image_translation, model='damo/cv_latent_diffusion_image2image_translation') result = img2img_gen_pipeline( ('data/test/images/img2img_input_mask.png', diff --git a/tests/pipelines/test_image_color_enhance.py b/tests/pipelines/test_image_color_enhance.py index ae22d65e..62ffbcb9 100644 --- a/tests/pipelines/test_image_color_enhance.py +++ b/tests/pipelines/test_image_color_enhance.py @@ -27,13 +27,13 @@ class ImageColorEnhanceTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_modelhub(self): img_color_enhance = pipeline( - Tasks.image_color_enhance, model=self.model_id) + Tasks.image_color_enhancement, model=self.model_id) self.pipeline_inference(img_color_enhance, 'data/test/images/image_color_enhance.png') @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_modelhub_default_model(self): - img_color_enhance = pipeline(Tasks.image_color_enhance) + img_color_enhance = pipeline(Tasks.image_color_enhancement) self.pipeline_inference(img_color_enhance, 'data/test/images/image_color_enhance.png') diff --git a/tests/pipelines/test_image_denoise.py b/tests/pipelines/test_image_denoise.py index b53f2e42..d3e0af24 100644 --- a/tests/pipelines/test_image_denoise.py +++ b/tests/pipelines/test_image_denoise.py @@ -30,7 +30,7 @@ class ImageDenoiseTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) - pipeline_ins = pipeline(task=Tasks.image_denoise, model=model) + pipeline_ins = pipeline(task=Tasks.image_denoising, model=model) denoise_img = pipeline_ins( input=self.demo_image_path)[OutputKeys.OUTPUT_IMG] denoise_img = Image.fromarray(denoise_img) @@ -39,7 +39,8 @@ class ImageDenoiseTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): - pipeline_ins = pipeline(task=Tasks.image_denoise, model=self.model_id) + pipeline_ins = pipeline( + task=Tasks.image_denoising, model=self.model_id) denoise_img = pipeline_ins( input=self.demo_image_path)[OutputKeys.OUTPUT_IMG] denoise_img = Image.fromarray(denoise_img) @@ -48,7 +49,7 @@ class ImageDenoiseTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): - pipeline_ins = pipeline(task=Tasks.image_denoise) + pipeline_ins = pipeline(task=Tasks.image_denoising) denoise_img = pipeline_ins( input=self.demo_image_path)[OutputKeys.OUTPUT_IMG] denoise_img = Image.fromarray(denoise_img) diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index af8ace50..c5309978 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -26,7 +26,7 @@ class ImageMattingTest(unittest.TestCase): model_file = osp.join(tmp_dir, ModelFile.TF_GRAPH_FILE) with open(model_file, 'wb') as ofile: ofile.write(File.read(model_path)) - img_matting = pipeline(Tasks.image_matting, model=tmp_dir) + img_matting = pipeline(Tasks.protrait_matting, model=tmp_dir) result = img_matting('data/test/images/image_matting.png') cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) @@ -38,7 +38,7 @@ class ImageMattingTest(unittest.TestCase): # input_location = '/dir/to/images' dataset = MsDataset.load(input_location, target='image') - img_matting = pipeline(Tasks.image_matting, model=self.model_id) + img_matting = pipeline(Tasks.protrait_matting, model=self.model_id) # note that for dataset output, the inference-output is a Generator that can be iterated. result = img_matting(dataset) cv2.imwrite('result.png', next(result)[OutputKeys.OUTPUT_IMG]) @@ -46,7 +46,7 @@ class ImageMattingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_modelhub(self): - img_matting = pipeline(Tasks.image_matting, model=self.model_id) + img_matting = pipeline(Tasks.protrait_matting, model=self.model_id) result = img_matting('data/test/images/image_matting.png') cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) @@ -54,7 +54,7 @@ class ImageMattingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_modelhub_default_model(self): - img_matting = pipeline(Tasks.image_matting) + img_matting = pipeline(Tasks.protrait_matting) result = img_matting('data/test/images/image_matting.png') cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) @@ -67,7 +67,7 @@ class ImageMattingTest(unittest.TestCase): namespace='damotest', split='test', target='file') - img_matting = pipeline(Tasks.image_matting, model=self.model_id) + img_matting = pipeline(Tasks.protrait_matting, model=self.model_id) result = img_matting(dataset) for i in range(2): cv2.imwrite(f'result_{i}.png', next(result)[OutputKeys.OUTPUT_IMG]) diff --git a/tests/utils/test_registry.py b/tests/utils/test_registry.py index 67e44f4e..0a37101d 100644 --- a/tests/utils/test_registry.py +++ b/tests/utils/test_registry.py @@ -42,12 +42,13 @@ class RegistryTest(unittest.TestCase): MODELS.get('Bert', Tasks.sentiment_analysis) is BertForSentimentAnalysis) - @MODELS.register_module(Tasks.object_detection) + @MODELS.register_module(Tasks.image_object_detection) class DETR(object): pass - self.assertTrue(Tasks.object_detection in MODELS.modules) - self.assertTrue(MODELS.get('DETR', Tasks.object_detection) is DETR) + self.assertTrue(Tasks.image_object_detection in MODELS.modules) + self.assertTrue( + MODELS.get('DETR', Tasks.image_object_detection) is DETR) self.assertEqual(len(MODELS.modules), 4)