|
- # Copyright (c) Alibaba, Inc. and its affiliates.
-
- from modelscope.utils.constant import Tasks
-
- TASK_OUTPUTS = {
-
- # ============ vision tasks ===================
-
- # image classification result for single sample
- # {
- # "labels": ["dog", "horse", "cow", "cat"],
- # "scores": [0.9, 0.1, 0.05, 0.05]
- # }
- Tasks.image_classification: ['scores', 'labels'],
- Tasks.image_tagging: ['scores', 'labels'],
-
- # object detection result for single sample
- # {
- # "boxes": [
- # [x1, y1, x2, y2],
- # [x1, y1, x2, y2],
- # [x1, y1, x2, y2],
- # ],
- # "labels": ["dog", "horse", "cow", "cat"],
- # "scores": [0.9, 0.1, 0.05, 0.05]
- # }
- Tasks.object_detection: ['scores', 'labels', 'boxes'],
-
- # instance segmentation result for single sample
- # {
- # "masks": [
- # np.array in bgr channel order
- # ],
- # "labels": ["dog", "horse", "cow", "cat"],
- # "scores": [0.9, 0.1, 0.05, 0.05]
- # }
- Tasks.image_segmentation: ['scores', 'labels', 'boxes'],
-
- # image generation/editing/matting result for single sample
- # {
- # "output_png": np.array with shape(h, w, 4)
- # for matting or (h, w, 3) for general purpose
- # }
- Tasks.image_editing: ['output_png'],
- Tasks.image_matting: ['output_png'],
- Tasks.image_generation: ['output_png'],
-
- # action recognition result for single video
- # {
- # "output_label": "abseiling"
- # }
- Tasks.action_recognition: ['output_label'],
-
- # pose estimation result for single sample
- # {
- # "poses": np.array with shape [num_pose, num_keypoint, 3],
- # each keypoint is a array [x, y, score]
- # "boxes": np.array with shape [num_pose, 4], each box is
- # [x1, y1, x2, y2]
- # }
- Tasks.pose_estimation: ['poses', 'boxes'],
-
- # ocr detection result for single sample
- # {
- # "det_polygons": np.array with shape [num_text, 8], each box is
- # [x1, y1, x2, y2, x3, y3, x4, y4]
- # }
- Tasks.ocr_detection: ['det_polygons'],
-
- # ============ nlp tasks ===================
-
- # text classification result for single sample
- # {
- # "labels": ["happy", "sad", "calm", "angry"],
- # "scores": [0.9, 0.1, 0.05, 0.05]
- # }
- Tasks.text_classification: ['scores', 'labels'],
-
- # text generation result for single sample
- # {
- # "text": "this is text generated by a model."
- # }
- Tasks.text_generation: ['text'],
-
- # fill mask result for single sample
- # {
- # "text": "this is the text which masks filled by model."
- # }
- Tasks.fill_mask: ['text'],
-
- # word segmentation result for single sample
- # {
- # "output": "今天 天气 不错 , 适合 出去 游玩"
- # }
- Tasks.word_segmentation: ['output'],
-
- # sentence similarity result for single sample
- # {
- # "labels": "1",
- # "scores": 0.9
- # }
- Tasks.sentence_similarity: ['scores', 'labels'],
-
- # sentiment classification result for single sample
- # {
- # "labels": ["happy", "sad", "calm", "angry"],
- # "scores": [0.9, 0.1, 0.05, 0.05]
- # }
- Tasks.sentiment_classification: ['scores', 'labels'],
-
- # nli result for single sample
- # {
- # "labels": ["happy", "sad", "calm", "angry"],
- # "scores": [0.9, 0.1, 0.05, 0.05]
- # }
- Tasks.nli: ['scores', 'labels'],
-
- # ============ audio tasks ===================
-
- # audio processed for single file in PCM format
- # {
- # "output_pcm": np.array with shape(samples,) and dtype float32
- # }
- Tasks.speech_signal_process: ['output_pcm'],
-
- # ============ multi-modal tasks ===================
-
- # image caption result for single sample
- # {
- # "caption": "this is an image caption text."
- # }
- Tasks.image_captioning: ['caption'],
-
- # multi-modal embedding result for single sample
- # {
- # "img_embedding": np.array with shape [1, D],
- # "text_embedding": np.array with shape [1, D]
- # }
- Tasks.multi_modal_embedding: ['img_embedding', 'text_embedding'],
-
- # visual grounding result for single sample
- # {
- # "boxes": [
- # [x1, y1, x2, y2],
- # [x1, y1, x2, y2],
- # [x1, y1, x2, y2],
- # ],
- # "scores": [0.9, 0.1, 0.05, 0.05]
- # }
- Tasks.visual_grounding: ['boxes', 'scores'],
-
- # text_to_image result for a single sample
- # {
- # "image": np.ndarray with shape [height, width, 3]
- # }
- Tasks.text_to_image_synthesis: ['image']
- }
|