Browse Source

adjust input and output format for demo service

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10873454
master^2
shuying.shu yingda.chen 3 years ago
parent
commit
6baf602bc2
3 changed files with 29 additions and 13 deletions
  1. +7
    -4
      modelscope/outputs/outputs.py
  2. +20
    -5
      modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
  3. +2
    -4
      tests/pipelines/test_referring_video_object_segmentation.py

+ 7
- 4
modelscope/outputs/outputs.py View File

@@ -435,9 +435,11 @@ TASK_OUTPUTS = {

# referring video object segmentation result for a single video
# {
# "masks": [np.array # 2D array with shape [height, width]]
# "masks": [np.array # 3D array with shape [frame_num, height, width]]
# "timestamps": ["hh:mm:ss", "hh:mm:ss", "hh:mm:ss"]
# }
Tasks.referring_video_object_segmentation: [OutputKeys.MASKS],
Tasks.referring_video_object_segmentation:
[OutputKeys.MASKS, OutputKeys.TIMESTAMPS],

# ============ nlp tasks ===================

@@ -698,8 +700,9 @@ TASK_OUTPUTS = {
# "img_embedding": np.array with shape [1, D],
# "text_embedding": np.array with shape [1, D]
# }
Tasks.multi_modal_embedding:
[OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING],
Tasks.multi_modal_embedding: [
OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING
],

# generative multi-modal embedding result for single sample
# {


+ 20
- 5
modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py View File

@@ -52,17 +52,16 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
"""
assert isinstance(input, tuple) and len(
input
) == 4, 'error - input type must be tuple and input length must be 4'
self.input_video_pth, text_queries, start_pt, end_pt = input
) == 2, 'error - input type must be tuple and input length must be 2'
self.input_video_pth, text_queries = input

assert 0 < end_pt - start_pt <= 10, 'error - the subclip length must be 0-10 seconds long'
assert 1 <= len(
text_queries) <= 2, 'error - 1-2 input text queries are expected'

# extract the relevant subclip:
self.input_clip_pth = 'input_clip.mp4'
with VideoFileClip(self.input_video_pth) as video:
subclip = video.subclip(start_pt, end_pt)
subclip = video.subclip()
subclip.write_videofile(self.input_clip_pth)

self.window_length = 24 # length of window during inference
@@ -191,7 +190,16 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
output_clip_path, fps=self.meta['video_fps'], audio=True)
del masked_video

result = {OutputKeys.MASKS: inputs}
masks = [mask.squeeze(1) for mask in inputs]

fps = self.meta['video_fps']
output_timestamps = []
for frame_idx in range(self.video.shape[0]):
output_timestamps.append(timestamp_format(seconds=frame_idx / fps))
result = {
OutputKeys.MASKS: masks,
OutputKeys.TIMESTAMPS: output_timestamps
}
return result


@@ -201,3 +209,10 @@ def apply_mask(image, mask, color, transparency=0.7):
color_matrix = np.ones(image.shape, dtype=np.float) * color
out_image = color_matrix * mask + image * (1.0 - mask)
return out_image


def timestamp_format(seconds):
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
time = '%02d:%02d:%06.3f' % (h, m, s)
return time

+ 2
- 4
tests/pipelines/test_referring_video_object_segmentation.py View File

@@ -21,8 +21,7 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase,
'guy in black performing tricks on a bike',
'a black bike used to perform tricks'
]
start_pt, end_pt = 4, 14
input_tuple = (input_location, text_queries, start_pt, end_pt)
input_tuple = (input_location, text_queries)
pp = pipeline(
Tasks.referring_video_object_segmentation, model=self.model_id)
result = pp(input_tuple)
@@ -38,8 +37,7 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase,
'guy in black performing tricks on a bike',
'a black bike used to perform tricks'
]
start_pt, end_pt = 4, 14
input_tuple = (input_location, text_queries, start_pt, end_pt)
input_tuple = (input_location, text_queries)
pp = pipeline(Tasks.referring_video_object_segmentation)
result = pp(input_tuple)
if result:


Loading…
Cancel
Save