[to #42322933]video summarization 添加 license & header; 修改 output for demo service

video summarization: 1. 添加 license & header; 2. 修改 output for demo service Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10260946
3 years ago · 4c99363804
--- a/modelscope/metrics/video_summarization_metric.py
+++ b/modelscope/metrics/video_summarization_metric.py
@@ -1,3 +1,6 @@
 # Part of the implementation is borrowed and modified from PGL-SUM,
 # publicly available at https://github.com/e-apostolidis/PGL-SUM

 from typing import Dict

 import numpy as np
--- a/modelscope/models/cv/video_summarization/init.py
+++ b/modelscope/models/cv/video_summarization/init.py
@@ -1 +1,22 @@
 from .summarizer import PGLVideoSummarization
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING

 from modelscope.utils.import_utils import LazyImportModule

 if TYPE_CHECKING:
    from .summarizer import (PGLVideoSummarization, summary_format)

 else:
    _import_structure = {
        'summarizer': ['PGLVideoSummarization', 'summary_format']
    }

    import sys

    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/models/cv/video_summarization/base_model.py
+++ b/modelscope/models/cv/video_summarization/base_model.py
@@ -1,4 +1,5 @@
 # The implementation is based on pytorch-caffe-models, available at https://github.com/crowsonkb/pytorch-caffe-models.
 # Part of the implementation is borrowed and modified from pytorch-caffe-models,
 # publicly available at https://github.com/crowsonkb/pytorch-caffe-models

 import cv2
 import numpy as np
--- a/modelscope/models/cv/video_summarization/kts/cpd_auto.py
+++ b/modelscope/models/cv/video_summarization/kts/cpd_auto.py
@@ -1,4 +1,5 @@
 # The implementation is based on KTS, available at https://github.com/TatsuyaShirakawa/KTS.
 # Part of the implementation is borrowed and modified from KTS,
 # publicly available at https://github.com/TatsuyaShirakawa/KTS

 import numpy as np

--- a/modelscope/models/cv/video_summarization/kts/cpd_nonlin.py
+++ b/modelscope/models/cv/video_summarization/kts/cpd_nonlin.py
@@ -1,4 +1,5 @@
 # The implementation is based on KTS, available at https://github.com/TatsuyaShirakawa/KTS.
 # Part of the implementation is borrowed and modified from KTS,
 # publicly available at https://github.com/TatsuyaShirakawa/KTS

 import numpy as np

--- a/modelscope/models/cv/video_summarization/pgl_sum.py
+++ b/modelscope/models/cv/video_summarization/pgl_sum.py
@@ -1,4 +1,5 @@
 # The implementation is based on PGL-SUM, available at https://github.com/e-apostolidis/PGL-SUM.
 # Part of the implementation is borrowed and modified from PGL-SUM,
 # publicly available at https://github.com/e-apostolidis/PGL-SUM

 import math

--- a/modelscope/models/cv/video_summarization/summarizer.py
+++ b/modelscope/models/cv/video_summarization/summarizer.py
@@ -1,4 +1,5 @@
 # The implementation is based on PGL-SUM, available at https://github.com/e-apostolidis/PGL-SUM.
 # Part of the implementation is borrowed and modified from PGL-SUM,
 # publicly available at https://github.com/e-apostolidis/PGL-SUM

 import os.path as osp
 from copy import deepcopy
@@ -23,7 +24,8 @@ logger = get_logger()
 def get_change_points(video_feat, n_frame):
    video_feat = np.array(video_feat, np.float32)
    K = np.dot(video_feat, video_feat.T)
    change_points, _ = cpd_auto(K, ncp=120, vmax=2.2 / 4.0, lmin=1)
    change_points, _ = cpd_auto(
        K, ncp=min(K.shape[0] - 1, 120), vmax=2.2 / 4.0, lmin=1)
    change_points = change_points * 15
    change_points = np.concatenate(([0], change_points, [n_frame - 1]))

@@ -135,6 +137,46 @@ def generate_summary(all_shot_bound, all_scores, all_nframes, all_positions):
    return all_summaries


 def transform_time(seconds):
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    time = '%02d:%02d:%06.3f' % (h, m, s)
    return time


 def summary_format(summary, fps):
    frames_list = []
    start_frame = -1
    end_frame = -1
    is_summary_frame = False
    for i, idx in enumerate(summary):
        if idx:
            if is_summary_frame is False:
                start_frame = i
                is_summary_frame = True
        else:
            if is_summary_frame:
                end_frame = i - 1
                frames_list.append([start_frame, end_frame])
                is_summary_frame = False

    if is_summary_frame and summary[-1] == 1:
        end_frame = len(frame_idxes) - 1
        frames_list.append([start_frame, end_frame])

    output = []
    for seg in frames_list:
        output.append({
            'frame':
            seg,
            'timestamps': [
                transform_time(seg[0] / float(fps)),
                transform_time(seg[1] / float(fps))
            ]
        })
    return output


@MODELS.register_module(
    Tasks.video_summarization, module_name=Models.video_summarization)
 class PGLVideoSummarization(TorchModel):
--- a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
+++ b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
@@ -1,3 +1,6 @@
 # Part of the implementation is borrowed and modified from PGL-SUM,
 # publicly available at https://github.com/e-apostolidis/PGL-SUM

 import os

 import h5py
@@ -15,7 +18,7 @@ class VideoSummarizationDataset(TorchTaskDataset):
        self.mode = mode
        self.data_filename = os.path.join(root_dir, opt.dataset_file)
        self.split_filename = os.path.join(root_dir, opt.split_file)
        self.split_index = opt.split_index  # it represents the current split (varies from 0 to 4)
        self.split_index = opt.split_index
        hdf = h5py.File(self.data_filename, 'r')
        self.list_frame_features, self.list_gtscores = [], []
        self.list_user_summary = []
--- a/modelscope/outputs.py
+++ b/modelscope/outputs.py
@@ -337,6 +337,22 @@ TASK_OUTPUTS = {
        OutputKeys.SCENE_META_LIST
    ],

    # video summarization result for a single video
    # {
    #        "output":
    #        [
    #           {
    #               "frame": [start_frame, end_frame]
    #               "timestamps": [start_time, end_time]
    #           },
    #           {
    #               "frame": [start_frame, end_frame]
    #               "timestamps": [start_time, end_time]
    #           }
    #        ]
    # }
    Tasks.video_summarization: [OutputKeys.OUTPUT],

    # ============ nlp tasks ===================

    # text classification result for single sample
--- a/modelscope/pipelines/cv/video_summarization_pipeline.py
+++ b/modelscope/pipelines/cv/video_summarization_pipeline.py
@@ -1,4 +1,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 # Part of the implementation is borrowed and modified from PGL-SUM,
 # publicly available at https://github.com/e-apostolidis/PGL-SUM

 import os.path as osp
 from typing import Any, Dict

@@ -8,7 +10,8 @@ import torch
 from tqdm import tqdm

 from modelscope.metainfo import Pipelines
 from modelscope.models.cv.video_summarization import PGLVideoSummarization
 from modelscope.models.cv.video_summarization import (PGLVideoSummarization,
                                                      summary_format)
 from modelscope.models.cv.video_summarization.base_model import bvlc_googlenet
 from modelscope.models.cv.video_summarization.summarizer import (
    generate_summary, get_change_points)
@@ -57,6 +60,8 @@ class VideoSummarizationPipeline(Pipeline):
        frames = []
        picks = []
        cap = cv2.VideoCapture(input)
        self.fps = cap.get(cv2.CAP_PROP_FPS)
        self.frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        frame_idx = 0
        while (cap.isOpened()):
            ret, frame = cap.read()
@@ -89,7 +94,9 @@ class VideoSummarizationPipeline(Pipeline):
        summary = self.inference(frame_features, input['n_frame'],
                                 input['picks'], change_points)

        return {OutputKeys.OUTPUT: summary}
        output = summary_format(summary, self.fps)

        return {OutputKeys.OUTPUT: output}

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        return inputs
--- a/tests/pipelines/test_video_summarization.py
+++ b/tests/pipelines/test_video_summarization.py
@@ -3,7 +3,6 @@ import unittest

 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import show_video_summarization_result
 from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level

@@ -22,8 +21,6 @@ class VideoSummarizationTest(unittest.TestCase, DemoCompatibilityCheck):
        result = summarization_pipeline(video_path)

        print(f'video summarization output: \n{result}.')
        show_video_summarization_result(video_path, result,
                                        './summarization_result.avi')

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_run_modelhub_default_model(self):