diff --git a/modelscope/metrics/video_summarization_metric.py b/modelscope/metrics/video_summarization_metric.py index d1867600..40580382 100644 --- a/modelscope/metrics/video_summarization_metric.py +++ b/modelscope/metrics/video_summarization_metric.py @@ -1,3 +1,6 @@ +# Part of the implementation is borrowed and modified from PGL-SUM, +# publicly available at https://github.com/e-apostolidis/PGL-SUM + from typing import Dict import numpy as np diff --git a/modelscope/models/cv/video_summarization/__init__.py b/modelscope/models/cv/video_summarization/__init__.py index 064110f7..15ad61b4 100644 --- a/modelscope/models/cv/video_summarization/__init__.py +++ b/modelscope/models/cv/video_summarization/__init__.py @@ -1 +1,22 @@ -from .summarizer import PGLVideoSummarization +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .summarizer import (PGLVideoSummarization, summary_format) + +else: + _import_structure = { + 'summarizer': ['PGLVideoSummarization', 'summary_format'] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/cv/video_summarization/base_model.py b/modelscope/models/cv/video_summarization/base_model.py index 670da251..912ba68d 100644 --- a/modelscope/models/cv/video_summarization/base_model.py +++ b/modelscope/models/cv/video_summarization/base_model.py @@ -1,4 +1,5 @@ -# The implementation is based on pytorch-caffe-models, available at https://github.com/crowsonkb/pytorch-caffe-models. +# Part of the implementation is borrowed and modified from pytorch-caffe-models, +# publicly available at https://github.com/crowsonkb/pytorch-caffe-models import cv2 import numpy as np diff --git a/modelscope/models/cv/video_summarization/kts/cpd_auto.py b/modelscope/models/cv/video_summarization/kts/cpd_auto.py index a794ca26..58281df8 100644 --- a/modelscope/models/cv/video_summarization/kts/cpd_auto.py +++ b/modelscope/models/cv/video_summarization/kts/cpd_auto.py @@ -1,4 +1,5 @@ -# The implementation is based on KTS, available at https://github.com/TatsuyaShirakawa/KTS. +# Part of the implementation is borrowed and modified from KTS, +# publicly available at https://github.com/TatsuyaShirakawa/KTS import numpy as np diff --git a/modelscope/models/cv/video_summarization/kts/cpd_nonlin.py b/modelscope/models/cv/video_summarization/kts/cpd_nonlin.py index ef2eb6ef..55e279e9 100644 --- a/modelscope/models/cv/video_summarization/kts/cpd_nonlin.py +++ b/modelscope/models/cv/video_summarization/kts/cpd_nonlin.py @@ -1,4 +1,5 @@ -# The implementation is based on KTS, available at https://github.com/TatsuyaShirakawa/KTS. +# Part of the implementation is borrowed and modified from KTS, +# publicly available at https://github.com/TatsuyaShirakawa/KTS import numpy as np diff --git a/modelscope/models/cv/video_summarization/pgl_sum.py b/modelscope/models/cv/video_summarization/pgl_sum.py index ab3010c9..2d27501d 100644 --- a/modelscope/models/cv/video_summarization/pgl_sum.py +++ b/modelscope/models/cv/video_summarization/pgl_sum.py @@ -1,4 +1,5 @@ -# The implementation is based on PGL-SUM, available at https://github.com/e-apostolidis/PGL-SUM. +# Part of the implementation is borrowed and modified from PGL-SUM, +# publicly available at https://github.com/e-apostolidis/PGL-SUM import math diff --git a/modelscope/models/cv/video_summarization/summarizer.py b/modelscope/models/cv/video_summarization/summarizer.py index c95da025..75251989 100644 --- a/modelscope/models/cv/video_summarization/summarizer.py +++ b/modelscope/models/cv/video_summarization/summarizer.py @@ -1,4 +1,5 @@ -# The implementation is based on PGL-SUM, available at https://github.com/e-apostolidis/PGL-SUM. +# Part of the implementation is borrowed and modified from PGL-SUM, +# publicly available at https://github.com/e-apostolidis/PGL-SUM import os.path as osp from copy import deepcopy @@ -23,7 +24,8 @@ logger = get_logger() def get_change_points(video_feat, n_frame): video_feat = np.array(video_feat, np.float32) K = np.dot(video_feat, video_feat.T) - change_points, _ = cpd_auto(K, ncp=120, vmax=2.2 / 4.0, lmin=1) + change_points, _ = cpd_auto( + K, ncp=min(K.shape[0] - 1, 120), vmax=2.2 / 4.0, lmin=1) change_points = change_points * 15 change_points = np.concatenate(([0], change_points, [n_frame - 1])) @@ -135,6 +137,46 @@ def generate_summary(all_shot_bound, all_scores, all_nframes, all_positions): return all_summaries +def transform_time(seconds): + m, s = divmod(seconds, 60) + h, m = divmod(m, 60) + time = '%02d:%02d:%06.3f' % (h, m, s) + return time + + +def summary_format(summary, fps): + frames_list = [] + start_frame = -1 + end_frame = -1 + is_summary_frame = False + for i, idx in enumerate(summary): + if idx: + if is_summary_frame is False: + start_frame = i + is_summary_frame = True + else: + if is_summary_frame: + end_frame = i - 1 + frames_list.append([start_frame, end_frame]) + is_summary_frame = False + + if is_summary_frame and summary[-1] == 1: + end_frame = len(frame_idxes) - 1 + frames_list.append([start_frame, end_frame]) + + output = [] + for seg in frames_list: + output.append({ + 'frame': + seg, + 'timestamps': [ + transform_time(seg[0] / float(fps)), + transform_time(seg[1] / float(fps)) + ] + }) + return output + + @MODELS.register_module( Tasks.video_summarization, module_name=Models.video_summarization) class PGLVideoSummarization(TorchModel): diff --git a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py index 89deb7ba..34eb0450 100644 --- a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py +++ b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py @@ -1,3 +1,6 @@ +# Part of the implementation is borrowed and modified from PGL-SUM, +# publicly available at https://github.com/e-apostolidis/PGL-SUM + import os import h5py @@ -15,7 +18,7 @@ class VideoSummarizationDataset(TorchTaskDataset): self.mode = mode self.data_filename = os.path.join(root_dir, opt.dataset_file) self.split_filename = os.path.join(root_dir, opt.split_file) - self.split_index = opt.split_index # it represents the current split (varies from 0 to 4) + self.split_index = opt.split_index hdf = h5py.File(self.data_filename, 'r') self.list_frame_features, self.list_gtscores = [], [] self.list_user_summary = [] diff --git a/modelscope/outputs.py b/modelscope/outputs.py index 331f4816..07a14191 100644 --- a/modelscope/outputs.py +++ b/modelscope/outputs.py @@ -337,6 +337,22 @@ TASK_OUTPUTS = { OutputKeys.SCENE_META_LIST ], + # video summarization result for a single video + # { + # "output": + # [ + # { + # "frame": [start_frame, end_frame] + # "timestamps": [start_time, end_time] + # }, + # { + # "frame": [start_frame, end_frame] + # "timestamps": [start_time, end_time] + # } + # ] + # } + Tasks.video_summarization: [OutputKeys.OUTPUT], + # ============ nlp tasks =================== # text classification result for single sample diff --git a/modelscope/pipelines/cv/video_summarization_pipeline.py b/modelscope/pipelines/cv/video_summarization_pipeline.py index 25ea1e7c..e4fe206d 100644 --- a/modelscope/pipelines/cv/video_summarization_pipeline.py +++ b/modelscope/pipelines/cv/video_summarization_pipeline.py @@ -1,4 +1,6 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. +# Part of the implementation is borrowed and modified from PGL-SUM, +# publicly available at https://github.com/e-apostolidis/PGL-SUM + import os.path as osp from typing import Any, Dict @@ -8,7 +10,8 @@ import torch from tqdm import tqdm from modelscope.metainfo import Pipelines -from modelscope.models.cv.video_summarization import PGLVideoSummarization +from modelscope.models.cv.video_summarization import (PGLVideoSummarization, + summary_format) from modelscope.models.cv.video_summarization.base_model import bvlc_googlenet from modelscope.models.cv.video_summarization.summarizer import ( generate_summary, get_change_points) @@ -57,6 +60,8 @@ class VideoSummarizationPipeline(Pipeline): frames = [] picks = [] cap = cv2.VideoCapture(input) + self.fps = cap.get(cv2.CAP_PROP_FPS) + self.frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) frame_idx = 0 while (cap.isOpened()): ret, frame = cap.read() @@ -89,7 +94,9 @@ class VideoSummarizationPipeline(Pipeline): summary = self.inference(frame_features, input['n_frame'], input['picks'], change_points) - return {OutputKeys.OUTPUT: summary} + output = summary_format(summary, self.fps) + + return {OutputKeys.OUTPUT: output} def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: return inputs diff --git a/tests/pipelines/test_video_summarization.py b/tests/pipelines/test_video_summarization.py index 6dcc31e9..1f965c53 100644 --- a/tests/pipelines/test_video_summarization.py +++ b/tests/pipelines/test_video_summarization.py @@ -3,7 +3,6 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -from modelscope.utils.cv.image_utils import show_video_summarization_result from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level @@ -22,8 +21,6 @@ class VideoSummarizationTest(unittest.TestCase, DemoCompatibilityCheck): result = summarization_pipeline(video_path) print(f'video summarization output: \n{result}.') - show_video_summarization_result(video_path, result, - './summarization_result.avi') @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_modelhub_default_model(self):