|
- # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
- import numpy as np
- import pycocotools.mask as mask_util
-
- from detectron2.utils.visualizer import (
- ColorMode,
- Visualizer,
- _create_text_labels,
- _PanopticPrediction,
- )
-
- from .colormap import random_color
-
-
- class _DetectedInstance:
- """
- Used to store data about detected objects in video frame,
- in order to transfer color to objects in the future frames.
-
- Attributes:
- label (int):
- bbox (tuple[float]):
- mask_rle (dict):
- color (tuple[float]): RGB colors in range (0, 1)
- ttl (int): time-to-live for the instance. For example, if ttl=2,
- the instance color can be transferred to objects in the next two frames.
- """
-
- __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"]
-
- def __init__(self, label, bbox, mask_rle, color, ttl):
- self.label = label
- self.bbox = bbox
- self.mask_rle = mask_rle
- self.color = color
- self.ttl = ttl
-
-
- class VideoVisualizer:
- def __init__(self, metadata, instance_mode=ColorMode.IMAGE):
- """
- Args:
- metadata (MetadataCatalog): image metadata.
- """
- self.metadata = metadata
- self._old_instances = []
- assert instance_mode in [
- ColorMode.IMAGE,
- ColorMode.IMAGE_BW,
- ], "Other mode not supported yet."
- self._instance_mode = instance_mode
-
- def draw_instance_predictions(self, frame, predictions):
- """
- Draw instance-level prediction results on an image.
-
- Args:
- frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
- predictions (Instances): the output of an instance detection/segmentation
- model. Following fields will be used to draw:
- "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
-
- Returns:
- output (VisImage): image object with visualizations.
- """
- frame_visualizer = Visualizer(frame, self.metadata)
- num_instances = len(predictions)
- if num_instances == 0:
- return frame_visualizer.output
-
- boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
- scores = predictions.scores if predictions.has("scores") else None
- classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
- keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
-
- if predictions.has("pred_masks"):
- masks = predictions.pred_masks
- # mask IOU is not yet enabled
- # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
- # assert len(masks_rles) == num_instances
- else:
- masks = None
-
- detected = [
- _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8)
- for i in range(num_instances)
- ]
- colors = self._assign_colors(detected)
-
- labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
-
- if self._instance_mode == ColorMode.IMAGE_BW:
- # any() returns uint8 tensor
- frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
- (masks.any(dim=0) > 0).numpy() if masks is not None else None
- )
- alpha = 0.3
- else:
- alpha = 0.5
-
- frame_visualizer.overlay_instances(
- boxes=None if masks is not None else boxes, # boxes are a bit distracting
- masks=masks,
- labels=labels,
- keypoints=keypoints,
- assigned_colors=colors,
- alpha=alpha,
- )
-
- return frame_visualizer.output
-
- def draw_sem_seg(self, frame, sem_seg, area_threshold=None):
- """
- Args:
- sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W),
- each value is the integer label.
- area_threshold (Optional[int]): only draw segmentations larger than the threshold
- """
- # don't need to do anything special
- frame_visualizer = Visualizer(frame, self.metadata)
- frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None)
- return frame_visualizer.output
-
- def draw_panoptic_seg_predictions(
- self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5
- ):
- frame_visualizer = Visualizer(frame, self.metadata)
- pred = _PanopticPrediction(panoptic_seg, segments_info)
-
- if self._instance_mode == ColorMode.IMAGE_BW:
- frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
- pred.non_empty_mask()
- )
-
- # draw mask for all semantic segments first i.e. "stuff"
- for mask, sinfo in pred.semantic_masks():
- category_idx = sinfo["category_id"]
- try:
- mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
- except AttributeError:
- mask_color = None
-
- frame_visualizer.draw_binary_mask(
- mask,
- color=mask_color,
- text=self.metadata.stuff_classes[category_idx],
- alpha=alpha,
- area_threshold=area_threshold,
- )
-
- all_instances = list(pred.instance_masks())
- if len(all_instances) == 0:
- return frame_visualizer.output
- # draw mask for all instances second
- masks, sinfo = list(zip(*all_instances))
- num_instances = len(masks)
- masks_rles = mask_util.encode(
- np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F")
- )
- assert len(masks_rles) == num_instances
-
- category_ids = [x["category_id"] for x in sinfo]
- detected = [
- _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8)
- for i in range(num_instances)
- ]
- colors = self._assign_colors(detected)
- labels = [self.metadata.thing_classes[k] for k in category_ids]
-
- frame_visualizer.overlay_instances(
- boxes=None,
- masks=masks,
- labels=labels,
- keypoints=None,
- assigned_colors=colors,
- alpha=alpha,
- )
- return frame_visualizer.output
-
- def _assign_colors(self, instances):
- """
- Naive tracking heuristics to assign same color to the same instance,
- will update the internal state of tracked instances.
-
- Returns:
- list[tuple[float]]: list of colors.
- """
-
- # Compute iou with either boxes or masks:
- is_crowd = np.zeros((len(instances),), dtype=np.bool)
- if instances[0].bbox is None:
- assert instances[0].mask_rle is not None
- # use mask iou only when box iou is None
- # because box seems good enough
- rles_old = [x.mask_rle for x in self._old_instances]
- rles_new = [x.mask_rle for x in instances]
- ious = mask_util.iou(rles_old, rles_new, is_crowd)
- threshold = 0.5
- else:
- boxes_old = [x.bbox for x in self._old_instances]
- boxes_new = [x.bbox for x in instances]
- ious = mask_util.iou(boxes_old, boxes_new, is_crowd)
- threshold = 0.6
- if len(ious) == 0:
- ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32")
-
- # Only allow matching instances of the same label:
- for old_idx, old in enumerate(self._old_instances):
- for new_idx, new in enumerate(instances):
- if old.label != new.label:
- ious[old_idx, new_idx] = 0
-
- matched_new_per_old = np.asarray(ious).argmax(axis=1)
- max_iou_per_old = np.asarray(ious).max(axis=1)
-
- # Try to find match for each old instance:
- extra_instances = []
- for idx, inst in enumerate(self._old_instances):
- if max_iou_per_old[idx] > threshold:
- newidx = matched_new_per_old[idx]
- if instances[newidx].color is None:
- instances[newidx].color = inst.color
- continue
- # If an old instance does not match any new instances,
- # keep it for the next frame in case it is just missed by the detector
- inst.ttl -= 1
- if inst.ttl > 0:
- extra_instances.append(inst)
-
- # Assign random color to newly-detected instances:
- for inst in instances:
- if inst.color is None:
- inst.color = random_color(rgb=True, maximum=1)
- self._old_instances = instances[:] + extra_instances
- return [d.color for d in instances]
|