Browse Source

adjust input and output format for demo service

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10873454
master^2
shuying.shu yingda.chen 3 years ago
parent
commit
6baf602bc2
3 changed files with 29 additions and 13 deletions
  1. +7
    -4
      modelscope/outputs/outputs.py
  2. +20
    -5
      modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
  3. +2
    -4
      tests/pipelines/test_referring_video_object_segmentation.py

+ 7
- 4
modelscope/outputs/outputs.py View File

@@ -435,9 +435,11 @@ TASK_OUTPUTS = {


# referring video object segmentation result for a single video # referring video object segmentation result for a single video
# { # {
# "masks": [np.array # 2D array with shape [height, width]]
# "masks": [np.array # 3D array with shape [frame_num, height, width]]
# "timestamps": ["hh:mm:ss", "hh:mm:ss", "hh:mm:ss"]
# } # }
Tasks.referring_video_object_segmentation: [OutputKeys.MASKS],
Tasks.referring_video_object_segmentation:
[OutputKeys.MASKS, OutputKeys.TIMESTAMPS],


# ============ nlp tasks =================== # ============ nlp tasks ===================


@@ -698,8 +700,9 @@ TASK_OUTPUTS = {
# "img_embedding": np.array with shape [1, D], # "img_embedding": np.array with shape [1, D],
# "text_embedding": np.array with shape [1, D] # "text_embedding": np.array with shape [1, D]
# } # }
Tasks.multi_modal_embedding:
[OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING],
Tasks.multi_modal_embedding: [
OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING
],


# generative multi-modal embedding result for single sample # generative multi-modal embedding result for single sample
# { # {


+ 20
- 5
modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py View File

@@ -52,17 +52,16 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
""" """
assert isinstance(input, tuple) and len( assert isinstance(input, tuple) and len(
input input
) == 4, 'error - input type must be tuple and input length must be 4'
self.input_video_pth, text_queries, start_pt, end_pt = input
) == 2, 'error - input type must be tuple and input length must be 2'
self.input_video_pth, text_queries = input


assert 0 < end_pt - start_pt <= 10, 'error - the subclip length must be 0-10 seconds long'
assert 1 <= len( assert 1 <= len(
text_queries) <= 2, 'error - 1-2 input text queries are expected' text_queries) <= 2, 'error - 1-2 input text queries are expected'


# extract the relevant subclip: # extract the relevant subclip:
self.input_clip_pth = 'input_clip.mp4' self.input_clip_pth = 'input_clip.mp4'
with VideoFileClip(self.input_video_pth) as video: with VideoFileClip(self.input_video_pth) as video:
subclip = video.subclip(start_pt, end_pt)
subclip = video.subclip()
subclip.write_videofile(self.input_clip_pth) subclip.write_videofile(self.input_clip_pth)


self.window_length = 24 # length of window during inference self.window_length = 24 # length of window during inference
@@ -191,7 +190,16 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
output_clip_path, fps=self.meta['video_fps'], audio=True) output_clip_path, fps=self.meta['video_fps'], audio=True)
del masked_video del masked_video


result = {OutputKeys.MASKS: inputs}
masks = [mask.squeeze(1) for mask in inputs]

fps = self.meta['video_fps']
output_timestamps = []
for frame_idx in range(self.video.shape[0]):
output_timestamps.append(timestamp_format(seconds=frame_idx / fps))
result = {
OutputKeys.MASKS: masks,
OutputKeys.TIMESTAMPS: output_timestamps
}
return result return result




@@ -201,3 +209,10 @@ def apply_mask(image, mask, color, transparency=0.7):
color_matrix = np.ones(image.shape, dtype=np.float) * color color_matrix = np.ones(image.shape, dtype=np.float) * color
out_image = color_matrix * mask + image * (1.0 - mask) out_image = color_matrix * mask + image * (1.0 - mask)
return out_image return out_image


def timestamp_format(seconds):
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
time = '%02d:%02d:%06.3f' % (h, m, s)
return time

+ 2
- 4
tests/pipelines/test_referring_video_object_segmentation.py View File

@@ -21,8 +21,7 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase,
'guy in black performing tricks on a bike', 'guy in black performing tricks on a bike',
'a black bike used to perform tricks' 'a black bike used to perform tricks'
] ]
start_pt, end_pt = 4, 14
input_tuple = (input_location, text_queries, start_pt, end_pt)
input_tuple = (input_location, text_queries)
pp = pipeline( pp = pipeline(
Tasks.referring_video_object_segmentation, model=self.model_id) Tasks.referring_video_object_segmentation, model=self.model_id)
result = pp(input_tuple) result = pp(input_tuple)
@@ -38,8 +37,7 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase,
'guy in black performing tricks on a bike', 'guy in black performing tricks on a bike',
'a black bike used to perform tricks' 'a black bike used to perform tricks'
] ]
start_pt, end_pt = 4, 14
input_tuple = (input_location, text_queries, start_pt, end_pt)
input_tuple = (input_location, text_queries)
pp = pipeline(Tasks.referring_video_object_segmentation) pp = pipeline(Tasks.referring_video_object_segmentation)
result = pp(input_tuple) result = pp(input_tuple)
if result: if result:


Loading…
Cancel
Save