1. task name and output definition: [link](https://alidocs.dingtalk.com/i/nodes/KOEmgBoGwD78vd2bAry3VndLerP9b30a?nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA&iframeQuery=utm_source%3Dportal%26utm_medium%3Dportal_space_file_tree) 2. rearrange task definition and add more outputs definition for tasks Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9585469master
| @@ -22,8 +22,8 @@ task_default_metrics = { | |||
| Tasks.sentence_similarity: [Metrics.seq_cls_metric], | |||
| Tasks.sentiment_classification: [Metrics.seq_cls_metric], | |||
| Tasks.text_generation: [Metrics.text_gen_metric], | |||
| Tasks.image_denoise: [Metrics.image_denoise_metric], | |||
| Tasks.image_color_enhance: [Metrics.image_color_enhance_metric] | |||
| Tasks.image_denoising: [Metrics.image_denoise_metric], | |||
| Tasks.image_color_enhancement: [Metrics.image_color_enhance_metric] | |||
| } | |||
| @@ -17,7 +17,8 @@ logger = get_logger() | |||
| __all__ = ['ImageColorEnhance'] | |||
| @MODELS.register_module(Tasks.image_color_enhance, module_name=Models.csrnet) | |||
| @MODELS.register_module( | |||
| Tasks.image_color_enhancement, module_name=Models.csrnet) | |||
| class ImageColorEnhance(TorchModel): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| @@ -19,7 +19,7 @@ logger = get_logger() | |||
| __all__ = ['NAFNetForImageDenoise'] | |||
| @MODELS.register_module(Tasks.image_denoise, module_name=Models.nafnet) | |||
| @MODELS.register_module(Tasks.image_denoising, module_name=Models.nafnet) | |||
| class NAFNetForImageDenoise(TorchModel): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| @@ -21,6 +21,7 @@ class OutputKeys(object): | |||
| OUTPUT_IMG = 'output_img' | |||
| OUTPUT_PCM = 'output_pcm' | |||
| IMG_EMBEDDING = 'img_embedding' | |||
| SPO_LIST = 'spo_list' | |||
| TEXT_EMBEDDING = 'text_embedding' | |||
| TRANSLATION = 'translation' | |||
| RESPONSE = 'response' | |||
| @@ -29,32 +30,21 @@ class OutputKeys(object): | |||
| PROBABILITIES = 'probabilities' | |||
| DIALOG_STATES = 'dialog_states' | |||
| VIDEO_EMBEDDING = 'video_embedding' | |||
| UUID = 'uuid' | |||
| WORD = 'word' | |||
| KWS_LIST = 'kws_list' | |||
| TASK_OUTPUTS = { | |||
| # ============ vision tasks =================== | |||
| # image classification result for single sample | |||
| # { | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # "labels": ["dog", "horse", "cow", "cat"], | |||
| # } | |||
| Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| Tasks.image_tagging: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # object detection result for single sample | |||
| # { | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # "labels": ["dog", "horse", "cow", "cat"], | |||
| # "boxes": [ | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # ], | |||
| # } | |||
| Tasks.object_detection: | |||
| [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], | |||
| # ocr detection result for single sample | |||
| # { | |||
| # "polygons": np.array with shape [num_text, 8], each polygon is | |||
| # [x1, y1, x2, y2, x3, y3, x4, y4] | |||
| # } | |||
| Tasks.ocr_detection: [OutputKeys.POLYGONS], | |||
| # face detection result for single sample | |||
| # { | |||
| @@ -81,35 +71,79 @@ TASK_OUTPUTS = { | |||
| # } | |||
| Tasks.face_recognition: [OutputKeys.IMG_EMBEDDING], | |||
| # human detection result for single sample | |||
| # { | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # "labels": ["person", "person", "person", "person"], | |||
| # "boxes": [ | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # ], | |||
| # } | |||
| # | |||
| Tasks.human_detection: | |||
| [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], | |||
| # face generation result for single sample | |||
| # { | |||
| # "output_img": np.array with shape(h, w, 3) | |||
| # } | |||
| Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG], | |||
| # image classification result for single sample | |||
| # { | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # "labels": ["dog", "horse", "cow", "cat"], | |||
| # } | |||
| Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # object detection result for single sample | |||
| # { | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # "labels": ["dog", "horse", "cow", "cat"], | |||
| # "boxes": [ | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # ], | |||
| # } | |||
| Tasks.image_object_detection: | |||
| [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], | |||
| # instance segmentation result for single sample | |||
| # { | |||
| # "scores": [0.9, 0.1, 0.05, 0.05], | |||
| # "labels": ["dog", "horse", "cow", "cat"], | |||
| # "boxes": [ | |||
| # np.array in bgr channel order | |||
| # "masks": [ | |||
| # np.array # 2D array containing only 0, 1 | |||
| # ] | |||
| # } | |||
| Tasks.image_segmentation: | |||
| [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], | |||
| [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS], | |||
| # image generation/editing/matting result for single sample | |||
| # image matting result for single sample | |||
| # { | |||
| # "output_img": np.array with shape(h, w, 4) | |||
| # for matting or (h, w, 3) for general purpose | |||
| # } | |||
| Tasks.image_editing: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_matting: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_generation: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_denoise: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_colorization: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.face_image_generation: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.protrait_matting: [OutputKeys.OUTPUT_IMG], | |||
| # image editing task result for a single image | |||
| # {"output_img": np.array with shape (h, w, 3)} | |||
| Tasks.image_protrait_enhancement: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.skin_retouching: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_super_resolution: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_colorization: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_color_enhancement: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_denoising: [OutputKeys.OUTPUT_IMG], | |||
| # action recognition result for single video | |||
| # { | |||
| # "output_label": "abseiling" | |||
| # } | |||
| Tasks.action_recognition: [OutputKeys.LABELS], | |||
| # image generation task result for a single image | |||
| # {"output_img": np.array with shape (h, w, 3)} | |||
| Tasks.image_to_image_generation: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_to_image_translation: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_style_transfer: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.image_portrait_stylization: [OutputKeys.OUTPUT_IMG], | |||
| # live category recognition result for single video | |||
| # { | |||
| @@ -117,28 +151,19 @@ TASK_OUTPUTS = { | |||
| # "labels": ['女装/女士精品>>棉衣/棉服', '女装/女士精品>>牛仔裤', '女装/女士精品>>裤子>>休闲裤'], | |||
| # } | |||
| Tasks.live_category: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # video category recognition result for single video | |||
| # { | |||
| # "scores": [0.7716429233551025] | |||
| # "labels": ['生活>>好物推荐'], | |||
| # } | |||
| Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # pose estimation result for single sample | |||
| # action recognition result for single video | |||
| # { | |||
| # "poses": np.array with shape [num_pose, num_keypoint, 3], | |||
| # each keypoint is a array [x, y, score] | |||
| # "boxes": np.array with shape [num_pose, 4], each box is | |||
| # [x1, y1, x2, y2] | |||
| # "output_label": "abseiling" | |||
| # } | |||
| Tasks.pose_estimation: [OutputKeys.POSES, OutputKeys.BOXES], | |||
| Tasks.action_recognition: [OutputKeys.LABELS], | |||
| # ocr detection result for single sample | |||
| # video category recognition result for single video | |||
| # { | |||
| # "polygons": np.array with shape [num_text, 8], each polygon is | |||
| # [x1, y1, x2, y2, x3, y3, x4, y4] | |||
| # "scores": [0.7716429233551025] | |||
| # "labels": ['生活>>好物推荐'], | |||
| # } | |||
| Tasks.ocr_detection: [OutputKeys.POLYGONS], | |||
| Tasks.video_category: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # image embedding result for a single image | |||
| # { | |||
| @@ -152,11 +177,11 @@ TASK_OUTPUTS = { | |||
| # } | |||
| Tasks.video_embedding: [OutputKeys.VIDEO_EMBEDDING], | |||
| # image_color_enhance result for a single sample | |||
| # virtual_try_on result for a single sample | |||
| # { | |||
| # "output_img": np.ndarray with shape [height, width, 3], uint8 | |||
| # "output_img": np.ndarray with shape [height, width, 3] | |||
| # } | |||
| Tasks.image_color_enhance: [OutputKeys.OUTPUT_IMG], | |||
| Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG], | |||
| # ============ nlp tasks =================== | |||
| @@ -167,33 +192,6 @@ TASK_OUTPUTS = { | |||
| # } | |||
| Tasks.text_classification: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # text generation result for single sample | |||
| # { | |||
| # "text": "this is the text generated by a model." | |||
| # } | |||
| Tasks.text_generation: [OutputKeys.TEXT], | |||
| # fill mask result for single sample | |||
| # { | |||
| # "text": "this is the text which masks filled by model." | |||
| # } | |||
| Tasks.fill_mask: [OutputKeys.TEXT], | |||
| # word segmentation result for single sample | |||
| # { | |||
| # "output": "今天 天气 不错 , 适合 出去 游玩" | |||
| # } | |||
| Tasks.word_segmentation: [OutputKeys.OUTPUT], | |||
| # named entity recognition result for single sample | |||
| # { | |||
| # "output": [ | |||
| # {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"}, | |||
| # {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"} | |||
| # ] | |||
| # } | |||
| Tasks.named_entity_recognition: [OutputKeys.OUTPUT], | |||
| # sentence similarity result for single sample | |||
| # { | |||
| # "scores": 0.9 | |||
| @@ -201,11 +199,12 @@ TASK_OUTPUTS = { | |||
| # } | |||
| Tasks.sentence_similarity: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # translation result for a source sentence | |||
| # nli result for single sample | |||
| # { | |||
| # "translation": “北京是中国的首都” | |||
| # "labels": ["happy", "sad", "calm", "angry"], | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # } | |||
| Tasks.translation: [OutputKeys.TRANSLATION], | |||
| Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # sentiment classification result for single sample | |||
| # { | |||
| @@ -221,14 +220,78 @@ TASK_OUTPUTS = { | |||
| # } | |||
| Tasks.zero_shot_classification: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| # nli result for single sample | |||
| # relation extraction result for a single sample | |||
| # { | |||
| # "uuid": "人生信息-1", | |||
| # "text": "《父老乡亲》是由是由由中国人民解放军海政文工团创作的军旅歌曲,石顺义作词,王锡仁作曲,范琳琳演唱", | |||
| # "spo_list": [{"subject": "石顺义", "predicate": "国籍", "object": "中国"}] | |||
| # } | |||
| Tasks.relation_extraction: | |||
| [OutputKeys.UUID, OutputKeys.TEXT, OutputKeys.SPO_LIST], | |||
| # translation result for a source sentence | |||
| # { | |||
| # "labels": ["happy", "sad", "calm", "angry"], | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # "translation": “北京是中国的首都” | |||
| # } | |||
| Tasks.nli: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| Tasks.translation: [OutputKeys.TRANSLATION], | |||
| # word segmentation result for single sample | |||
| # { | |||
| # "output": "今天 天气 不错 , 适合 出去 游玩" | |||
| # } | |||
| Tasks.word_segmentation: [OutputKeys.OUTPUT], | |||
| # part-of-speech result for single sample | |||
| # [ | |||
| # {'word': '诸葛', 'label': 'PROPN'}, | |||
| # {'word': '亮', 'label': 'PROPN'}, | |||
| # {'word': '发明', 'label': 'VERB'}, | |||
| # {'word': '八', 'label': 'NUM'}, | |||
| # {'word': '阵', 'label': 'NOUN'}, | |||
| # {'word': '图', 'label': 'PART'}, | |||
| # {'word': '以', 'label': 'ADV'}, | |||
| # {'word': '利', 'label': 'VERB'}, | |||
| # {'word': '立营', 'label': 'VERB'}, | |||
| # {'word': '练兵', 'label': 'VERB'}, | |||
| # {'word': '.', 'label': 'PUNCT'} | |||
| # ] | |||
| # TODO @wenmeng.zwm support list of result check | |||
| Tasks.part_of_speech: [OutputKeys.WORD, OutputKeys.LABEL], | |||
| # named entity recognition result for single sample | |||
| # { | |||
| # "output": [ | |||
| # {"type": "LOC", "start": 2, "end": 5, "span": "温岭市"}, | |||
| # {"type": "LOC", "start": 5, "end": 8, "span": "新河镇"} | |||
| # ] | |||
| # } | |||
| Tasks.named_entity_recognition: [OutputKeys.OUTPUT], | |||
| # dialog intent prediction result for single sample | |||
| # text_error_correction result for a single sample | |||
| # { | |||
| # "output": "我想吃苹果" | |||
| # } | |||
| Tasks.text_error_correction: [OutputKeys.OUTPUT], | |||
| # text generation result for single sample | |||
| # { | |||
| # "text": "this is the text generated by a model." | |||
| # } | |||
| Tasks.text_generation: [OutputKeys.TEXT], | |||
| # text feature extraction for single sample | |||
| # { | |||
| # "text_embedding": np.array with shape [1, D] | |||
| # } | |||
| Tasks.sentence_embedding: [OutputKeys.TEXT_EMBEDDING], | |||
| # fill mask result for single sample | |||
| # { | |||
| # "text": "this is the text which masks filled by model." | |||
| # } | |||
| Tasks.fill_mask: [OutputKeys.TEXT], | |||
| # (Deprecated) dialog intent prediction result for single sample | |||
| # {'pred': array([2.62349960e-03, 4.12110658e-03, 4.12748595e-05, 3.77560973e-05, | |||
| # 1.08599677e-04, 1.72710388e-05, 2.95618793e-05, 1.93638436e-04, | |||
| # 6.45841064e-05, 1.15997791e-04, 5.11605394e-05, 9.87020373e-01, | |||
| @@ -252,11 +315,11 @@ TASK_OUTPUTS = { | |||
| Tasks.dialog_intent_prediction: | |||
| [OutputKeys.PREDICTION, OutputKeys.LABEL_POS, OutputKeys.LABEL], | |||
| # dialog modeling prediction result for single sample | |||
| # (Deprecated) dialog modeling prediction result for single sample | |||
| # sys : ['you', 'are', 'welcome', '.', 'have', 'a', 'great', 'day', '!'] | |||
| Tasks.dialog_modeling: [OutputKeys.RESPONSE], | |||
| # dialog state tracking result for single sample | |||
| # (Deprecated) dialog state tracking result for single sample | |||
| # { | |||
| # "dialog_states": { | |||
| # "taxi-leaveAt": "none", | |||
| @@ -294,6 +357,9 @@ TASK_OUTPUTS = { | |||
| Tasks.dialog_state_tracking: [OutputKeys.DIALOG_STATES], | |||
| # ============ audio tasks =================== | |||
| # asr result for single sample | |||
| # { "text": "每一天都要快乐喔"} | |||
| Tasks.auto_speech_recognition: [OutputKeys.TEXT], | |||
| # audio processed for single file in PCM format | |||
| # { | |||
| @@ -303,30 +369,19 @@ TASK_OUTPUTS = { | |||
| Tasks.acoustic_echo_cancellation: [OutputKeys.OUTPUT_PCM], | |||
| Tasks.acoustic_noise_suppression: [OutputKeys.OUTPUT_PCM], | |||
| # ============ multi-modal tasks =================== | |||
| # image caption result for single sample | |||
| # text_to_speech result for a single sample | |||
| # { | |||
| # "caption": "this is an image caption text." | |||
| # "output_pcm": {"input_label" : np.ndarray with shape [D]} | |||
| # } | |||
| Tasks.image_captioning: [OutputKeys.CAPTION], | |||
| Tasks.text_to_speech: [OutputKeys.OUTPUT_PCM], | |||
| # multi-modal embedding result for single sample | |||
| # { | |||
| # "img_embedding": np.array with shape [1, D], | |||
| # "text_embedding": np.array with shape [1, D] | |||
| # } | |||
| Tasks.multi_modal_embedding: | |||
| [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING], | |||
| # ============ multi-modal tasks =================== | |||
| # generative multi-modal embedding result for single sample | |||
| # image caption result for single sample | |||
| # { | |||
| # "img_embedding": np.array with shape [1, D], | |||
| # "text_embedding": np.array with shape [1, D], | |||
| # "caption": "this is an image caption text." | |||
| # } | |||
| Tasks.generative_multi_modal_embedding: | |||
| [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION], | |||
| Tasks.image_captioning: [OutputKeys.CAPTION], | |||
| # visual grounding result for single sample | |||
| # { | |||
| @@ -350,25 +405,31 @@ TASK_OUTPUTS = { | |||
| # "output_pcm": {"input_label" : np.ndarray with shape [D]} | |||
| # } | |||
| Tasks.text_to_speech: [OutputKeys.OUTPUT_PCM], | |||
| # virtual_try_on result for a single sample | |||
| # multi-modal embedding result for single sample | |||
| # { | |||
| # "output_img": np.ndarray with shape [height, width, 3] | |||
| # "img_embedding": np.array with shape [1, D], | |||
| # "text_embedding": np.array with shape [1, D] | |||
| # } | |||
| Tasks.virtual_try_on: [OutputKeys.OUTPUT_IMG], | |||
| # visual_question_answering result for a single sample | |||
| Tasks.multi_modal_embedding: | |||
| [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING], | |||
| # generative multi-modal embedding result for single sample | |||
| # { | |||
| # "text": "this is the text generated by a model." | |||
| # "img_embedding": np.array with shape [1, D], | |||
| # "text_embedding": np.array with shape [1, D], | |||
| # "caption": "this is an image caption text." | |||
| # } | |||
| Tasks.generative_multi_modal_embedding: | |||
| [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING, OutputKeys.CAPTION], | |||
| # VQA result for a sample | |||
| # {"text": "this is a text answser. "} | |||
| Tasks.visual_question_answering: [OutputKeys.TEXT], | |||
| # auto_speech_recognition result for a single sample | |||
| # { | |||
| # "text": "每天都要快乐喔" | |||
| # } | |||
| Tasks.auto_speech_recognition: [OutputKeys.TEXT], | |||
| # text_error_correction result for a single sample | |||
| # { | |||
| # "output": "我想吃苹果" | |||
| # "scores": [0.9, 0.1, 0.1], | |||
| # "labels": ["entailment", "contradiction", "neutral"] | |||
| # } | |||
| Tasks.text_error_correction: [OutputKeys.OUTPUT] | |||
| Tasks.visual_entailment: [OutputKeys.SCORES, OutputKeys.LABELS], | |||
| } | |||
| @@ -39,8 +39,8 @@ DEFAULT_MODEL_FOR_PIPELINE = { | |||
| 'damo/cv_resnet18_human-detection'), | |||
| Tasks.image_object_detection: (Pipelines.object_detection, | |||
| 'damo/cv_vit_object-detection_coco'), | |||
| Tasks.image_denoise: (Pipelines.image_denoise, | |||
| 'damo/cv_nafnet_image-denoise_sidd'), | |||
| Tasks.image_denoising: (Pipelines.image_denoise, | |||
| 'damo/cv_nafnet_image-denoise_sidd'), | |||
| Tasks.text_classification: (Pipelines.sentiment_analysis, | |||
| 'damo/bert-base-sst2'), | |||
| Tasks.text_generation: (Pipelines.text_generation, | |||
| @@ -94,8 +94,9 @@ DEFAULT_MODEL_FOR_PIPELINE = { | |||
| Tasks.video_multi_modal_embedding: | |||
| (Pipelines.video_multi_modal_embedding, | |||
| 'damo/multi_modal_clip_vtretrival_msrvtt_53'), | |||
| Tasks.image_color_enhance: (Pipelines.image_color_enhance, | |||
| 'damo/cv_csrnet_image-color-enhance-models'), | |||
| Tasks.image_color_enhancement: | |||
| (Pipelines.image_color_enhance, | |||
| 'damo/cv_csrnet_image-color-enhance-models'), | |||
| Tasks.virtual_try_on: (Pipelines.virtual_try_on, | |||
| 'damo/cv_daflow_virtual-try-on_base'), | |||
| Tasks.image_colorization: (Pipelines.image_colorization, | |||
| @@ -18,7 +18,7 @@ logger = get_logger() | |||
| @PIPELINES.register_module( | |||
| Tasks.image_color_enhance, module_name=Pipelines.image_color_enhance) | |||
| Tasks.image_color_enhancement, module_name=Pipelines.image_color_enhance) | |||
| class ImageColorEnhancePipeline(Pipeline): | |||
| def __init__(self, | |||
| @@ -19,7 +19,7 @@ __all__ = ['ImageDenoisePipeline'] | |||
| @PIPELINES.register_module( | |||
| Tasks.image_denoise, module_name=Pipelines.image_denoise) | |||
| Tasks.image_denoising, module_name=Pipelines.image_denoise) | |||
| class ImageDenoisePipeline(Pipeline): | |||
| def __init__(self, | |||
| @@ -15,6 +15,8 @@ from modelscope.utils.logger import get_logger | |||
| logger = get_logger() | |||
| @PIPELINES.register_module( | |||
| Tasks.protrait_matting, module_name=Pipelines.image_matting) | |||
| @PIPELINES.register_module( | |||
| Tasks.image_matting, module_name=Pipelines.image_matting) | |||
| class ImageMattingPipeline(Pipeline): | |||
| @@ -34,7 +34,8 @@ def save_grid(imgs, filename, nrow=5): | |||
| @PIPELINES.register_module( | |||
| Tasks.image_generation, module_name=Pipelines.image2image_translation) | |||
| Tasks.image_to_image_translation, | |||
| module_name=Pipelines.image2image_translation) | |||
| class Image2ImageTranslationPipeline(Pipeline): | |||
| def __init__(self, model: str, **kwargs): | |||
| @@ -34,7 +34,8 @@ def is_official_hub_path(path: Union[str, List], | |||
| try: | |||
| _ = HubApi().get_model(path, revision=revision) | |||
| return True | |||
| except Exception: | |||
| except Exception as e: | |||
| logger.warning(f'get model exception: {e}') | |||
| return False | |||
| if isinstance(path, str): | |||
| @@ -14,42 +14,60 @@ class Fields(object): | |||
| class CVTasks(object): | |||
| # vision tasks | |||
| image_to_text = 'image-to-text' | |||
| pose_estimation = 'pose-estimation' | |||
| # ocr | |||
| ocr_detection = 'ocr-detection' | |||
| ocr_recognition = 'ocr-recognition' | |||
| # human face body related | |||
| face_detection = 'face-detection' | |||
| face_recognition = 'face-recognition' | |||
| human_detection = 'human-detection' | |||
| human_object_interaction = 'human-object-interaction' | |||
| face_image_generation = 'face-image-generation' | |||
| image_classification = 'image-classification' | |||
| image_tagging = 'image-tagging' | |||
| object_detection = 'object-detection' | |||
| image_multilabel_classification = 'image-multilabel-classification' | |||
| image_classification_imagenet = 'image-classification-imagenet' | |||
| image_classification_dailylife = 'image-classification-dailylife' | |||
| image_object_detection = 'image-object-detection' | |||
| human_detection = 'human-detection' | |||
| image_segmentation = 'image-segmentation' | |||
| image_editing = 'image-editing' | |||
| image_generation = 'image-generation' | |||
| image_matting = 'image-matting' | |||
| image_denoise = 'image-denoise' | |||
| ocr_detection = 'ocr-detection' | |||
| action_recognition = 'action-recognition' | |||
| video_embedding = 'video-embedding' | |||
| face_detection = 'face-detection' | |||
| face_recognition = 'face-recognition' | |||
| image_color_enhance = 'image-color-enhance' | |||
| virtual_try_on = 'virtual-try-on' | |||
| protrait_matting = 'protrait-matting' | |||
| # image editting | |||
| image_protrait_enhancement = 'image-protrait-enhancement' | |||
| skin_retouching = 'skin-retouching' | |||
| image_super_resolution = 'image-super-resolution' | |||
| image_colorization = 'image-colorization' | |||
| face_image_generation = 'face-image-generation' | |||
| image_color_enhancement = 'image-color-enhancement' | |||
| image_denoising = 'image-denoising' | |||
| # image generation | |||
| image_to_image_translation = 'image-to-image-translation' | |||
| image_to_image_generation = 'image-to-image-generation' | |||
| image_style_transfer = 'image-style-transfer' | |||
| image_super_resolution = 'image-super-resolution' | |||
| image_portrait_stylization = 'image-portrait-stylization' | |||
| image_embedding = 'image-embedding' | |||
| product_retrieval_embedding = 'product-retrieval-embedding' | |||
| # video recognition | |||
| live_category = 'live-category' | |||
| action_recognition = 'action-recognition' | |||
| video_category = 'video-category' | |||
| image_classification_imagenet = 'image-classification-imagenet' | |||
| image_classification_dailylife = 'image-classification-dailylife' | |||
| image_portrait_stylization = 'image-portrait-stylization' | |||
| image_to_image_generation = 'image-to-image-generation' | |||
| video_embedding = 'video-embedding' | |||
| virtual_try_on = 'virtual-try-on' | |||
| class NLPTasks(object): | |||
| # nlp tasks | |||
| word_segmentation = 'word-segmentation' | |||
| part_of_speech = 'part-of-speech' | |||
| named_entity_recognition = 'named-entity-recognition' | |||
| nli = 'nli' | |||
| sentiment_classification = 'sentiment-classification' | |||
| @@ -66,7 +84,7 @@ class NLPTasks(object): | |||
| dialog_intent_prediction = 'dialog-intent-prediction' | |||
| dialog_state_tracking = 'dialog-state-tracking' | |||
| table_question_answering = 'table-question-answering' | |||
| feature_extraction = 'feature-extraction' | |||
| sentence_embedding = 'sentence-embedding' | |||
| fill_mask = 'fill-mask' | |||
| summarization = 'summarization' | |||
| question_answering = 'question-answering' | |||
| @@ -21,7 +21,7 @@ logger = get_logger() | |||
| @PIPELINES.register_module( | |||
| group_key=Tasks.image_tagging, module_name='custom_single_model') | |||
| group_key=Tasks.image_classification, module_name='custom_single_model') | |||
| class CustomSingleModelPipeline(Pipeline): | |||
| def __init__(self, | |||
| @@ -38,7 +38,7 @@ class CustomSingleModelPipeline(Pipeline): | |||
| @PIPELINES.register_module( | |||
| group_key=Tasks.image_tagging, module_name='model1_model2') | |||
| group_key=Tasks.image_classification, module_name='model1_model2') | |||
| class CustomMultiModelPipeline(Pipeline): | |||
| def __init__(self, | |||
| @@ -64,7 +64,7 @@ class PipelineInterfaceTest(unittest.TestCase): | |||
| cfg_file = os.path.join(dirname, ModelFile.CONFIGURATION) | |||
| cfg = { | |||
| ConfigFields.framework: Frameworks.torch, | |||
| ConfigFields.task: Tasks.image_tagging, | |||
| ConfigFields.task: Tasks.image_classification, | |||
| ConfigFields.pipeline: { | |||
| 'type': pipeline_name, | |||
| } | |||
| @@ -77,12 +77,13 @@ class PipelineInterfaceTest(unittest.TestCase): | |||
| self.prepare_dir('/tmp/model2', 'model1_model2') | |||
| def test_single_model(self): | |||
| pipe = pipeline(Tasks.image_tagging, model='/tmp/custom_single_model') | |||
| pipe = pipeline( | |||
| Tasks.image_classification, model='/tmp/custom_single_model') | |||
| assert isinstance(pipe, CustomSingleModelPipeline) | |||
| def test_multi_model(self): | |||
| pipe = pipeline( | |||
| Tasks.image_tagging, model=['/tmp/model1', '/tmp/model2']) | |||
| Tasks.image_classification, model=['/tmp/model1', '/tmp/model2']) | |||
| assert isinstance(pipe, CustomMultiModelPipeline) | |||
| @@ -24,7 +24,7 @@ class Image2ImageTranslationTest(unittest.TestCase): | |||
| just like the following code. | |||
| """ | |||
| img2img_gen_pipeline = pipeline( | |||
| Tasks.image_generation, | |||
| Tasks.image_to_image_translation, | |||
| model='damo/cv_latent_diffusion_image2image_translation') | |||
| result = img2img_gen_pipeline( | |||
| ('data/test/images/img2img_input_mask.png', | |||
| @@ -27,13 +27,13 @@ class ImageColorEnhanceTest(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_modelhub(self): | |||
| img_color_enhance = pipeline( | |||
| Tasks.image_color_enhance, model=self.model_id) | |||
| Tasks.image_color_enhancement, model=self.model_id) | |||
| self.pipeline_inference(img_color_enhance, | |||
| 'data/test/images/image_color_enhance.png') | |||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||
| def test_run_modelhub_default_model(self): | |||
| img_color_enhance = pipeline(Tasks.image_color_enhance) | |||
| img_color_enhance = pipeline(Tasks.image_color_enhancement) | |||
| self.pipeline_inference(img_color_enhance, | |||
| 'data/test/images/image_color_enhance.png') | |||
| @@ -30,7 +30,7 @@ class ImageDenoiseTest(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_model_from_modelhub(self): | |||
| model = Model.from_pretrained(self.model_id) | |||
| pipeline_ins = pipeline(task=Tasks.image_denoise, model=model) | |||
| pipeline_ins = pipeline(task=Tasks.image_denoising, model=model) | |||
| denoise_img = pipeline_ins( | |||
| input=self.demo_image_path)[OutputKeys.OUTPUT_IMG] | |||
| denoise_img = Image.fromarray(denoise_img) | |||
| @@ -39,7 +39,8 @@ class ImageDenoiseTest(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_with_model_name(self): | |||
| pipeline_ins = pipeline(task=Tasks.image_denoise, model=self.model_id) | |||
| pipeline_ins = pipeline( | |||
| task=Tasks.image_denoising, model=self.model_id) | |||
| denoise_img = pipeline_ins( | |||
| input=self.demo_image_path)[OutputKeys.OUTPUT_IMG] | |||
| denoise_img = Image.fromarray(denoise_img) | |||
| @@ -48,7 +49,7 @@ class ImageDenoiseTest(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||
| def test_run_with_default_model(self): | |||
| pipeline_ins = pipeline(task=Tasks.image_denoise) | |||
| pipeline_ins = pipeline(task=Tasks.image_denoising) | |||
| denoise_img = pipeline_ins( | |||
| input=self.demo_image_path)[OutputKeys.OUTPUT_IMG] | |||
| denoise_img = Image.fromarray(denoise_img) | |||
| @@ -26,7 +26,7 @@ class ImageMattingTest(unittest.TestCase): | |||
| model_file = osp.join(tmp_dir, ModelFile.TF_GRAPH_FILE) | |||
| with open(model_file, 'wb') as ofile: | |||
| ofile.write(File.read(model_path)) | |||
| img_matting = pipeline(Tasks.image_matting, model=tmp_dir) | |||
| img_matting = pipeline(Tasks.protrait_matting, model=tmp_dir) | |||
| result = img_matting('data/test/images/image_matting.png') | |||
| cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) | |||
| @@ -38,7 +38,7 @@ class ImageMattingTest(unittest.TestCase): | |||
| # input_location = '/dir/to/images' | |||
| dataset = MsDataset.load(input_location, target='image') | |||
| img_matting = pipeline(Tasks.image_matting, model=self.model_id) | |||
| img_matting = pipeline(Tasks.protrait_matting, model=self.model_id) | |||
| # note that for dataset output, the inference-output is a Generator that can be iterated. | |||
| result = img_matting(dataset) | |||
| cv2.imwrite('result.png', next(result)[OutputKeys.OUTPUT_IMG]) | |||
| @@ -46,7 +46,7 @@ class ImageMattingTest(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_modelhub(self): | |||
| img_matting = pipeline(Tasks.image_matting, model=self.model_id) | |||
| img_matting = pipeline(Tasks.protrait_matting, model=self.model_id) | |||
| result = img_matting('data/test/images/image_matting.png') | |||
| cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) | |||
| @@ -54,7 +54,7 @@ class ImageMattingTest(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||
| def test_run_modelhub_default_model(self): | |||
| img_matting = pipeline(Tasks.image_matting) | |||
| img_matting = pipeline(Tasks.protrait_matting) | |||
| result = img_matting('data/test/images/image_matting.png') | |||
| cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) | |||
| @@ -67,7 +67,7 @@ class ImageMattingTest(unittest.TestCase): | |||
| namespace='damotest', | |||
| split='test', | |||
| target='file') | |||
| img_matting = pipeline(Tasks.image_matting, model=self.model_id) | |||
| img_matting = pipeline(Tasks.protrait_matting, model=self.model_id) | |||
| result = img_matting(dataset) | |||
| for i in range(2): | |||
| cv2.imwrite(f'result_{i}.png', next(result)[OutputKeys.OUTPUT_IMG]) | |||
| @@ -42,12 +42,13 @@ class RegistryTest(unittest.TestCase): | |||
| MODELS.get('Bert', Tasks.sentiment_analysis) is | |||
| BertForSentimentAnalysis) | |||
| @MODELS.register_module(Tasks.object_detection) | |||
| @MODELS.register_module(Tasks.image_object_detection) | |||
| class DETR(object): | |||
| pass | |||
| self.assertTrue(Tasks.object_detection in MODELS.modules) | |||
| self.assertTrue(MODELS.get('DETR', Tasks.object_detection) is DETR) | |||
| self.assertTrue(Tasks.image_object_detection in MODELS.modules) | |||
| self.assertTrue( | |||
| MODELS.get('DETR', Tasks.image_object_detection) is DETR) | |||
| self.assertEqual(len(MODELS.modules), 4) | |||