diff --git a/modelscope/models/cv/realtime_object_detection/realtime_video_detector.py b/modelscope/models/cv/realtime_object_detection/realtime_video_detector.py index fc7339b3..3830fb42 100644 --- a/modelscope/models/cv/realtime_object_detection/realtime_video_detector.py +++ b/modelscope/models/cv/realtime_object_detection/realtime_video_detector.py @@ -16,6 +16,7 @@ from modelscope.models.builder import MODELS from modelscope.preprocessors import LoadImage from modelscope.utils.config import Config from modelscope.utils.constant import ModelFile, Tasks +from .utils import timestamp_format from .yolox.data.data_augment import ValTransform from .yolox.exp import get_exp_by_name from .yolox.utils import postprocess @@ -99,14 +100,17 @@ class RealtimeVideoDetector(TorchModel): def inference_video(self, v_path): outputs = [] desc = 'Detecting video: {}'.format(v_path) - for frame, result in tqdm( - self.inference_video_iter(v_path), desc=desc): + for frame_idx, (frame, result) in enumerate( + tqdm(self.inference_video_iter(v_path), desc=desc)): + result = result + (timestamp_format(seconds=frame_idx + / self.fps), ) outputs.append(result) return outputs def inference_video_iter(self, v_path): capture = cv2.VideoCapture(v_path) + self.fps = capture.get(cv2.CAP_PROP_FPS) while capture.isOpened(): ret, frame = capture.read() if not ret: diff --git a/modelscope/models/cv/realtime_object_detection/utils.py b/modelscope/models/cv/realtime_object_detection/utils.py new file mode 100644 index 00000000..c3d7a4c6 --- /dev/null +++ b/modelscope/models/cv/realtime_object_detection/utils.py @@ -0,0 +1,9 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import math + + +def timestamp_format(seconds): + m, s = divmod(seconds, 60) + h, m = divmod(m, 60) + time = '%02d:%02d:%06.3f' % (h, m, s) + return time diff --git a/modelscope/pipelines/cv/realtime_video_object_detection_pipeline.py b/modelscope/pipelines/cv/realtime_video_object_detection_pipeline.py index 3686c50a..073fad66 100644 --- a/modelscope/pipelines/cv/realtime_video_object_detection_pipeline.py +++ b/modelscope/pipelines/cv/realtime_video_object_detection_pipeline.py @@ -45,15 +45,17 @@ class RealtimeVideoObjectDetectionPipeline(Pipeline): **kwargs) -> str: forward_output = input['forward_output'] - scores, boxes, labels = [], [], [] + scores, boxes, labels, timestamps = [], [], [], [] for result in forward_output: - box, score, label = result + box, score, label, timestamp = result scores.append(score) boxes.append(box) labels.append(label) + timestamps.append(timestamp) return { OutputKeys.BOXES: boxes, OutputKeys.SCORES: scores, OutputKeys.LABELS: labels, + OutputKeys.TIMESTAMPS: timestamps, }