You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

visualizer.py 45 kB

3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125
  1. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2. import colorsys
  3. import logging
  4. import math
  5. import numpy as np
  6. from enum import Enum, unique
  7. import cv2
  8. import matplotlib as mpl
  9. import matplotlib.colors as mplc
  10. import matplotlib.figure as mplfigure
  11. import pycocotools.mask as mask_util
  12. import torch
  13. from matplotlib.backends.backend_agg import FigureCanvasAgg
  14. from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
  15. from .colormap import random_color
  16. logger = logging.getLogger(__name__)
  17. __all__ = ["ColorMode", "VisImage", "Visualizer"]
  18. _SMALL_OBJECT_AREA_THRESH = 1000
  19. _LARGE_MASK_AREA_THRESH = 120000
  20. _OFF_WHITE = (1.0, 1.0, 240.0 / 255)
  21. _BLACK = (0, 0, 0)
  22. _RED = (1.0, 0, 0)
  23. _KEYPOINT_THRESHOLD = 0.05
  24. @unique
  25. class ColorMode(Enum):
  26. """
  27. Enum of different color modes to use for instance visualizations.
  28. Attributes:
  29. IMAGE: Picks a random color for every instance and overlay segmentations with low opacity.
  30. SEGMENTATION: Let instances of the same category have similar colors, and overlay them with
  31. high opacity. This provides more attention on the quality of segmentation.
  32. IMAGE_BW: same as IMAGE, but convert all areas without masks to gray-scale.
  33. Only available for drawing per-instance mask predictions.
  34. """
  35. IMAGE = 0
  36. SEGMENTATION = 1
  37. IMAGE_BW = 2
  38. class GenericMask:
  39. """
  40. Attribute:
  41. polygons (list[ndarray]): list[ndarray]: polygons for this mask.
  42. Each ndarray has format [x, y, x, y, ...]
  43. mask (ndarray): a binary mask
  44. """
  45. def __init__(self, mask_or_polygons, height, width):
  46. self._mask = self._polygons = self._has_holes = None
  47. self.height = height
  48. self.width = width
  49. m = mask_or_polygons
  50. if isinstance(m, dict):
  51. # RLEs
  52. assert "counts" in m and "size" in m
  53. if isinstance(m["counts"], list): # uncompressed RLEs
  54. h, w = m["size"]
  55. assert h == height and w == width
  56. m = mask_util.frPyObjects(m, h, w)
  57. self._mask = mask_util.decode(m)[:, :]
  58. return
  59. if isinstance(m, list): # list[ndarray]
  60. self._polygons = [np.asarray(x).reshape(-1) for x in m]
  61. return
  62. if isinstance(m, np.ndarray): # assumed to be a binary mask
  63. assert m.shape[1] != 2, m.shape
  64. assert m.shape == (height, width), m.shape
  65. self._mask = m.astype("uint8")
  66. return
  67. raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
  68. @property
  69. def mask(self):
  70. if self._mask is None:
  71. self._mask = self.polygons_to_mask(self._polygons)
  72. return self._mask
  73. @property
  74. def polygons(self):
  75. if self._polygons is None:
  76. self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
  77. return self._polygons
  78. @property
  79. def has_holes(self):
  80. if self._has_holes is None:
  81. if self._mask is not None:
  82. self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
  83. else:
  84. self._has_holes = False # if original format is polygon, does not have holes
  85. return self._has_holes
  86. def mask_to_polygons(self, mask):
  87. # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
  88. # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
  89. # Internal contours (holes) are placed in hierarchy-2.
  90. # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
  91. mask = np.ascontiguousarray(mask) # some versions of cv2 does not support incontiguous arr
  92. res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
  93. hierarchy = res[-1]
  94. if hierarchy is None: # empty mask
  95. return [], False
  96. has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
  97. res = res[-2]
  98. res = [x.flatten() for x in res]
  99. res = [x for x in res if len(x) >= 6]
  100. return res, has_holes
  101. def polygons_to_mask(self, polygons):
  102. rle = mask_util.frPyObjects(polygons, self.height, self.width)
  103. rle = mask_util.merge(rle)
  104. return mask_util.decode(rle)[:, :]
  105. def area(self):
  106. return self.mask.sum()
  107. def bbox(self):
  108. p = mask_util.frPyObjects(self.polygons, self.height, self.width)
  109. p = mask_util.merge(p)
  110. bbox = mask_util.toBbox(p)
  111. bbox[2] += bbox[0]
  112. bbox[3] += bbox[1]
  113. return bbox
  114. class _PanopticPrediction:
  115. def __init__(self, panoptic_seg, segments_info):
  116. self._seg = panoptic_seg
  117. self._sinfo = {s["id"]: s for s in segments_info} # seg id -> seg info
  118. segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
  119. areas = areas.numpy()
  120. sorted_idxs = np.argsort(-areas)
  121. self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
  122. self._seg_ids = self._seg_ids.tolist()
  123. for sid, area in zip(self._seg_ids, self._seg_areas):
  124. if sid in self._sinfo:
  125. self._sinfo[sid]["area"] = float(area)
  126. def non_empty_mask(self):
  127. """
  128. Returns:
  129. (H, W) array, a mask for all pixels that have a prediction
  130. """
  131. empty_ids = []
  132. for id in self._seg_ids:
  133. if id not in self._sinfo:
  134. empty_ids.append(id)
  135. if len(empty_ids) == 0:
  136. return np.zeros(self._seg.shape, dtype=np.uint8)
  137. assert (
  138. len(empty_ids) == 1
  139. ), ">1 ids corresponds to no labels. This is currently not supported"
  140. return (self._seg != empty_ids[0]).numpy().astype(np.bool)
  141. def semantic_masks(self):
  142. for sid in self._seg_ids:
  143. sinfo = self._sinfo.get(sid)
  144. if sinfo is None or sinfo["isthing"]:
  145. # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
  146. continue
  147. yield (self._seg == sid).numpy().astype(np.bool), sinfo
  148. def instance_masks(self):
  149. for sid in self._seg_ids:
  150. sinfo = self._sinfo.get(sid)
  151. if sinfo is None or not sinfo["isthing"]:
  152. continue
  153. mask = (self._seg == sid).numpy().astype(np.bool)
  154. if mask.sum() > 0:
  155. yield mask, sinfo
  156. def _create_text_labels(classes, scores, class_names):
  157. """
  158. Args:
  159. classes (list[int] or None):
  160. scores (list[float] or None):
  161. class_names (list[str] or None):
  162. Returns:
  163. list[str] or None
  164. """
  165. labels = None
  166. if classes is not None and class_names is not None and len(class_names) > 1:
  167. labels = [class_names[i] for i in classes]
  168. if scores is not None:
  169. if labels is None:
  170. labels = ["{:.0f}%".format(s * 100) for s in scores]
  171. else:
  172. labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
  173. return labels
  174. class VisImage:
  175. def __init__(self, img, scale=1.0):
  176. """
  177. Args:
  178. img (ndarray): an RGB image of shape (H, W, 3).
  179. scale (float): scale the input image
  180. """
  181. self.img = img
  182. self.scale = scale
  183. self.width, self.height = img.shape[1], img.shape[0]
  184. self._setup_figure(img)
  185. def _setup_figure(self, img):
  186. """
  187. Args:
  188. Same as in :meth:`__init__()`.
  189. Returns:
  190. fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
  191. ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
  192. """
  193. fig = mplfigure.Figure(frameon=False)
  194. self.dpi = fig.get_dpi()
  195. # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
  196. # (https://github.com/matplotlib/matplotlib/issues/15363)
  197. fig.set_size_inches(
  198. (self.width * self.scale + 1e-2) / self.dpi,
  199. (self.height * self.scale + 1e-2) / self.dpi,
  200. )
  201. self.canvas = FigureCanvasAgg(fig)
  202. # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
  203. ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
  204. ax.axis("off")
  205. ax.set_xlim(0.0, self.width)
  206. ax.set_ylim(self.height)
  207. self.fig = fig
  208. self.ax = ax
  209. def save(self, filepath):
  210. """
  211. Args:
  212. filepath (str): a string that contains the absolute path, including the file name, where
  213. the visualized image will be saved.
  214. """
  215. if filepath.lower().endswith(".jpg") or filepath.lower().endswith(".png"):
  216. # faster than matplotlib's imshow
  217. cv2.imwrite(filepath, self.get_image()[:, :, ::-1])
  218. else:
  219. # support general formats (e.g. pdf)
  220. self.ax.imshow(self.img, interpolation="nearest")
  221. self.fig.savefig(filepath)
  222. def get_image(self):
  223. """
  224. Returns:
  225. ndarray: the visualized image of shape (H, W, 3) (RGB) in uint8 type.
  226. The shape is scaled w.r.t the input image using the given `scale` argument.
  227. """
  228. canvas = self.canvas
  229. s, (width, height) = canvas.print_to_buffer()
  230. if (self.width, self.height) != (width, height):
  231. img = cv2.resize(self.img, (width, height))
  232. else:
  233. img = self.img
  234. # buf = io.BytesIO() # works for cairo backend
  235. # canvas.print_rgba(buf)
  236. # width, height = self.width, self.height
  237. # s = buf.getvalue()
  238. buffer = np.frombuffer(s, dtype="uint8")
  239. # imshow is slow. blend manually (still quite slow)
  240. img_rgba = buffer.reshape(height, width, 4)
  241. rgb, alpha = np.split(img_rgba, [3], axis=2)
  242. try:
  243. import numexpr as ne # fuse them with numexpr
  244. visualized_image = ne.evaluate("img * (1 - alpha / 255.0) + rgb * (alpha / 255.0)")
  245. except ImportError:
  246. alpha = alpha.astype("float32") / 255.0
  247. visualized_image = img * (1 - alpha) + rgb * alpha
  248. visualized_image = visualized_image.astype("uint8")
  249. return visualized_image
  250. class Visualizer:
  251. def __init__(self, img_rgb, metadata, scale=1.0, instance_mode=ColorMode.IMAGE):
  252. """
  253. Args:
  254. img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
  255. the height and width of the image respectively. C is the number of
  256. color channels. The image is required to be in RGB format since that
  257. is a requirement of the Matplotlib library. The image is also expected
  258. to be in the range [0, 255].
  259. metadata (MetadataCatalog): image metadata.
  260. """
  261. self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
  262. self.metadata = metadata
  263. self.output = VisImage(self.img, scale=scale)
  264. self.cpu_device = torch.device("cpu")
  265. # too small texts are useless, therefore clamp to 9
  266. self._default_font_size = max(
  267. np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
  268. )
  269. self._instance_mode = instance_mode
  270. def draw_instance_predictions(self, predictions):
  271. """
  272. Draw instance-level prediction results on an image.
  273. Args:
  274. predictions (Instances): the output of an instance detection/segmentation
  275. model. Following fields will be used to draw:
  276. "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
  277. Returns:
  278. output (VisImage): image object with visualizations.
  279. """
  280. boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
  281. scores = predictions.scores if predictions.has("scores") else None
  282. classes = predictions.pred_classes if predictions.has("pred_classes") else None
  283. labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
  284. keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
  285. if predictions.has("pred_masks"):
  286. masks = np.asarray(predictions.pred_masks)
  287. masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
  288. else:
  289. masks = None
  290. if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
  291. colors = [
  292. self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
  293. ]
  294. alpha = 0.8
  295. else:
  296. colors = None
  297. alpha = 0.5
  298. if self._instance_mode == ColorMode.IMAGE_BW:
  299. assert predictions.has("pred_masks"), "ColorMode.IMAGE_BW requires segmentations"
  300. self.output.img = self._create_grayscale_image(
  301. (predictions.pred_masks.any(dim=0) > 0).numpy()
  302. )
  303. alpha = 0.3
  304. self.overlay_instances(
  305. masks=masks,
  306. boxes=boxes,
  307. labels=labels,
  308. keypoints=keypoints,
  309. assigned_colors=colors,
  310. alpha=alpha,
  311. )
  312. return self.output
  313. def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
  314. """
  315. Draw semantic segmentation predictions/labels.
  316. Args:
  317. sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
  318. area_threshold (int): segments with less than `area_threshold` are not drawn.
  319. alpha (float): the larger it is, the more opaque the segmentations are.
  320. Returns:
  321. output (VisImage): image object with visualizations.
  322. """
  323. if isinstance(sem_seg, torch.Tensor):
  324. sem_seg = sem_seg.numpy()
  325. labels, areas = np.unique(sem_seg, return_counts=True)
  326. sorted_idxs = np.argsort(-areas).tolist()
  327. labels = labels[sorted_idxs]
  328. for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
  329. try:
  330. mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
  331. except (AttributeError, IndexError):
  332. mask_color = None
  333. binary_mask = (sem_seg == label).astype(np.uint8)
  334. text = self.metadata.stuff_classes[label]
  335. self.draw_binary_mask(
  336. binary_mask,
  337. color=mask_color,
  338. edge_color=_OFF_WHITE,
  339. text=text,
  340. alpha=alpha,
  341. area_threshold=area_threshold,
  342. )
  343. return self.output
  344. def draw_panoptic_seg_predictions(
  345. self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
  346. ):
  347. """
  348. Draw panoptic prediction results on an image.
  349. Args:
  350. panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
  351. segment.
  352. segments_info (list[dict]): Describe each segment in `panoptic_seg`.
  353. Each dict contains keys "id", "category_id", "isthing".
  354. area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
  355. Returns:
  356. output (VisImage): image object with visualizations.
  357. """
  358. pred = _PanopticPrediction(panoptic_seg, segments_info)
  359. if self._instance_mode == ColorMode.IMAGE_BW:
  360. self.output.img = self._create_grayscale_image(pred.non_empty_mask())
  361. # draw mask for all semantic segments first i.e. "stuff"
  362. for mask, sinfo in pred.semantic_masks():
  363. category_idx = sinfo["category_id"]
  364. try:
  365. mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
  366. except AttributeError:
  367. mask_color = None
  368. text = self.metadata.stuff_classes[category_idx]
  369. self.draw_binary_mask(
  370. mask,
  371. color=mask_color,
  372. edge_color=_OFF_WHITE,
  373. text=text,
  374. alpha=alpha,
  375. area_threshold=area_threshold,
  376. )
  377. # draw mask for all instances second
  378. all_instances = list(pred.instance_masks())
  379. if len(all_instances) == 0:
  380. return self.output
  381. masks, sinfo = list(zip(*all_instances))
  382. category_ids = [x["category_id"] for x in sinfo]
  383. try:
  384. scores = [x["score"] for x in sinfo]
  385. except KeyError:
  386. scores = None
  387. labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes)
  388. try:
  389. colors = [random_color(rgb=True, maximum=1) for k in category_ids]
  390. except AttributeError:
  391. colors = None
  392. self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
  393. return self.output
  394. def draw_dataset_dict(self, dic):
  395. """
  396. Draw annotations/segmentaions in Detectron2 Dataset format.
  397. Args:
  398. dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
  399. Returns:
  400. output (VisImage): image object with visualizations.
  401. """
  402. annos = dic.get("annotations", None)
  403. if annos:
  404. if "segmentation" in annos[0]:
  405. masks = [x["segmentation"] for x in annos]
  406. else:
  407. masks = None
  408. if "keypoints" in annos[0]:
  409. keypts = [x["keypoints"] for x in annos]
  410. keypts = np.array(keypts).reshape(len(annos), -1, 3)
  411. else:
  412. keypts = None
  413. boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos]
  414. labels = [x["category_id"] for x in annos]
  415. names = self.metadata.get("thing_classes", None)
  416. if names:
  417. labels = [names[i] for i in labels]
  418. labels = [
  419. "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
  420. for i, a in zip(labels, annos)
  421. ]
  422. self.overlay_instances(labels=labels, boxes=boxes, masks=masks, keypoints=keypts)
  423. sem_seg = dic.get("sem_seg", None)
  424. if sem_seg is None and "sem_seg_file_name" in dic:
  425. sem_seg = cv2.imread(dic["sem_seg_file_name"], cv2.IMREAD_GRAYSCALE)
  426. if sem_seg is not None:
  427. self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
  428. return self.output
  429. def overlay_instances(
  430. self,
  431. *,
  432. boxes=None,
  433. labels=None,
  434. masks=None,
  435. keypoints=None,
  436. assigned_colors=None,
  437. alpha=0.5
  438. ):
  439. """
  440. Args:
  441. boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
  442. or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
  443. or a :class:`RotatedBoxes`,
  444. or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
  445. for the N objects in a single image,
  446. labels (list[str]): the text to be displayed for each instance.
  447. masks (masks-like object): Supported types are:
  448. * `structures.masks.PolygonMasks`, `structures.masks.BitMasks`.
  449. * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
  450. The first level of the list corresponds to individual instances. The second
  451. level to all the polygon that compose the instance, and the third level
  452. to the polygon coordinates. The third level should have the format of
  453. [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
  454. * list[ndarray]: each ndarray is a binary mask of shape (H, W).
  455. * list[dict]: each dict is a COCO-style RLE.
  456. keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
  457. where the N is the number of instances and K is the number of keypoints.
  458. The last dimension corresponds to (x, y, visibility or score).
  459. assigned_colors (list[matplotlib.colors]): a list of colors, where each color
  460. corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
  461. for full list of formats that the colors are accepted in.
  462. Returns:
  463. output (VisImage): image object with visualizations.
  464. """
  465. num_instances = None
  466. if boxes is not None:
  467. boxes = self._convert_boxes(boxes)
  468. num_instances = len(boxes)
  469. if masks is not None:
  470. masks = self._convert_masks(masks)
  471. if num_instances:
  472. assert len(masks) == num_instances
  473. else:
  474. num_instances = len(masks)
  475. if keypoints is not None:
  476. if num_instances:
  477. assert len(keypoints) == num_instances
  478. else:
  479. num_instances = len(keypoints)
  480. keypoints = self._convert_keypoints(keypoints)
  481. if labels is not None:
  482. assert len(labels) == num_instances
  483. if assigned_colors is None:
  484. assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
  485. if num_instances == 0:
  486. return self.output
  487. if boxes is not None and boxes.shape[1] == 5:
  488. return self.overlay_rotated_instances(
  489. boxes=boxes, labels=labels, assigned_colors=assigned_colors
  490. )
  491. # Display in largest to smallest order to reduce occlusion.
  492. areas = None
  493. if boxes is not None:
  494. areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
  495. elif masks is not None:
  496. areas = np.asarray([x.area() for x in masks])
  497. if areas is not None:
  498. sorted_idxs = np.argsort(-areas).tolist()
  499. # Re-order overlapped instances in descending order.
  500. boxes = boxes[sorted_idxs] if boxes is not None else None
  501. labels = [labels[k] for k in sorted_idxs] if labels is not None else None
  502. masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
  503. assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
  504. keypoints = keypoints[sorted_idxs] if keypoints is not None else None
  505. for i in range(num_instances):
  506. color = assigned_colors[i]
  507. if boxes is not None:
  508. self.draw_box(boxes[i], edge_color=color)
  509. if masks is not None:
  510. for segment in masks[i].polygons:
  511. self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
  512. if labels is not None:
  513. # first get a box
  514. if boxes is not None:
  515. x0, y0, x1, y1 = boxes[i]
  516. text_pos = (x0, y0) # if drawing boxes, put text on the box corner.
  517. horiz_align = "left"
  518. elif masks is not None:
  519. x0, y0, x1, y1 = masks[i].bbox()
  520. # draw text in the center (defined by median) when box is not drawn
  521. # median is less sensitive to outliers.
  522. text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
  523. horiz_align = "center"
  524. else:
  525. continue # drawing the box confidence for keypoints isn't very useful.
  526. # for small objects, draw text at the side to avoid occlusion
  527. instance_area = (y1 - y0) * (x1 - x0)
  528. if (
  529. instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
  530. or y1 - y0 < 40 * self.output.scale
  531. ):
  532. if y1 >= self.output.height - 5:
  533. text_pos = (x1, y0)
  534. else:
  535. text_pos = (x0, y1)
  536. height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
  537. lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
  538. font_size = (
  539. np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
  540. * 0.5
  541. * self._default_font_size
  542. )
  543. self.draw_text(
  544. labels[i],
  545. text_pos,
  546. color=lighter_color,
  547. horizontal_alignment=horiz_align,
  548. font_size=font_size,
  549. )
  550. # draw keypoints
  551. if keypoints is not None:
  552. for keypoints_per_instance in keypoints:
  553. self.draw_and_connect_keypoints(keypoints_per_instance)
  554. return self.output
  555. def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
  556. """
  557. Args:
  558. boxes (ndarray): an Nx5 numpy array of
  559. (x_center, y_center, width, height, angle_degrees) format
  560. for the N objects in a single image.
  561. labels (list[str]): the text to be displayed for each instance.
  562. assigned_colors (list[matplotlib.colors]): a list of colors, where each color
  563. corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
  564. for full list of formats that the colors are accepted in.
  565. Returns:
  566. output (VisImage): image object with visualizations.
  567. """
  568. num_instances = len(boxes)
  569. if assigned_colors is None:
  570. assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
  571. if num_instances == 0:
  572. return self.output
  573. # Display in largest to smallest order to reduce occlusion.
  574. if boxes is not None:
  575. areas = boxes[:, 2] * boxes[:, 3]
  576. sorted_idxs = np.argsort(-areas).tolist()
  577. # Re-order overlapped instances in descending order.
  578. boxes = boxes[sorted_idxs]
  579. labels = [labels[k] for k in sorted_idxs] if labels is not None else None
  580. colors = [assigned_colors[idx] for idx in sorted_idxs]
  581. for i in range(num_instances):
  582. self.draw_rotated_box_with_label(
  583. boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
  584. )
  585. return self.output
  586. def draw_and_connect_keypoints(self, keypoints):
  587. """
  588. Draws keypoints of an instance and follows the rules for keypoint connections
  589. to draw lines between appropriate keypoints. This follows color heuristics for
  590. line color.
  591. Args:
  592. keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
  593. and the last dimension corresponds to (x, y, probability).
  594. Returns:
  595. output (VisImage): image object with visualizations.
  596. """
  597. visible = {}
  598. keypoint_names = self.metadata.get("keypoint_names")
  599. for idx, keypoint in enumerate(keypoints):
  600. # draw keypoint
  601. x, y, prob = keypoint
  602. if prob > _KEYPOINT_THRESHOLD:
  603. self.draw_circle((x, y), color=_RED)
  604. if keypoint_names:
  605. keypoint_name = keypoint_names[idx]
  606. visible[keypoint_name] = (x, y)
  607. if self.metadata.get("keypoint_connection_rules"):
  608. for kp0, kp1, color in self.metadata.keypoint_connection_rules:
  609. if kp0 in visible and kp1 in visible:
  610. x0, y0 = visible[kp0]
  611. x1, y1 = visible[kp1]
  612. color = tuple(x / 255.0 for x in color)
  613. self.draw_line([x0, x1], [y0, y1], color=color)
  614. # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
  615. # Note that this strategy is specific to person keypoints.
  616. # For other keypoints, it should just do nothing
  617. try:
  618. ls_x, ls_y = visible["left_shoulder"]
  619. rs_x, rs_y = visible["right_shoulder"]
  620. mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
  621. except KeyError:
  622. pass
  623. else:
  624. # draw line from nose to mid-shoulder
  625. nose_x, nose_y = visible.get("nose", (None, None))
  626. if nose_x is not None:
  627. self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
  628. try:
  629. # draw line from mid-shoulder to mid-hip
  630. lh_x, lh_y = visible["left_hip"]
  631. rh_x, rh_y = visible["right_hip"]
  632. except KeyError:
  633. pass
  634. else:
  635. mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
  636. self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
  637. return self.output
  638. """
  639. Primitive drawing functions:
  640. """
  641. def draw_text(
  642. self,
  643. text,
  644. position,
  645. *,
  646. font_size=None,
  647. color="g",
  648. horizontal_alignment="center",
  649. rotation=0
  650. ):
  651. """
  652. Args:
  653. text (str): class label
  654. position (tuple): a tuple of the x and y coordinates to place text on image.
  655. font_size (int, optional): font of the text. If not provided, a font size
  656. proportional to the image width is calculated and used.
  657. color: color of the text. Refer to `matplotlib.colors` for full list
  658. of formats that are accepted.
  659. horizontal_alignment (str): see `matplotlib.text.Text`
  660. rotation: rotation angle in degrees CCW
  661. Returns:
  662. output (VisImage): image object with text drawn.
  663. """
  664. if not font_size:
  665. font_size = self._default_font_size
  666. # since the text background is dark, we don't want the text to be dark
  667. color = np.maximum(list(mplc.to_rgb(color)), 0.2)
  668. color[np.argmax(color)] = max(0.8, np.max(color))
  669. x, y = position
  670. self.output.ax.text(
  671. x,
  672. y,
  673. text,
  674. size=font_size * self.output.scale,
  675. family="sans-serif",
  676. bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
  677. verticalalignment="top",
  678. horizontalalignment=horizontal_alignment,
  679. color=color,
  680. zorder=10,
  681. rotation=rotation,
  682. )
  683. return self.output
  684. def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
  685. """
  686. Args:
  687. box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
  688. are the coordinates of the image's top left corner. x1 and y1 are the
  689. coordinates of the image's bottom right corner.
  690. alpha (float): blending efficient. Smaller values lead to more transparent masks.
  691. edge_color: color of the outline of the box. Refer to `matplotlib.colors`
  692. for full list of formats that are accepted.
  693. line_style (string): the string to use to create the outline of the boxes.
  694. Returns:
  695. output (VisImage): image object with box drawn.
  696. """
  697. x0, y0, x1, y1 = box_coord
  698. width = x1 - x0
  699. height = y1 - y0
  700. linewidth = max(self._default_font_size / 4, 1)
  701. self.output.ax.add_patch(
  702. mpl.patches.Rectangle(
  703. (x0, y0),
  704. width,
  705. height,
  706. fill=False,
  707. edgecolor=edge_color,
  708. linewidth=linewidth * self.output.scale,
  709. alpha=alpha,
  710. linestyle=line_style,
  711. )
  712. )
  713. return self.output
  714. def draw_rotated_box_with_label(
  715. self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
  716. ):
  717. """
  718. Args:
  719. rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
  720. where cnt_x and cnt_y are the center coordinates of the box.
  721. w and h are the width and height of the box. angle represents how
  722. many degrees the box is rotated CCW with regard to the 0-degree box.
  723. alpha (float): blending efficient. Smaller values lead to more transparent masks.
  724. edge_color: color of the outline of the box. Refer to `matplotlib.colors`
  725. for full list of formats that are accepted.
  726. line_style (string): the string to use to create the outline of the boxes.
  727. label (string): label for rotated box. It will not be rendered when set to None.
  728. Returns:
  729. output (VisImage): image object with box drawn.
  730. """
  731. cnt_x, cnt_y, w, h, angle = rotated_box
  732. area = w * h
  733. # use thinner lines when the box is small
  734. linewidth = self._default_font_size / (
  735. 6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
  736. )
  737. theta = angle * math.pi / 180.0
  738. c = math.cos(theta)
  739. s = math.sin(theta)
  740. rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
  741. # x: left->right ; y: top->down
  742. rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
  743. for k in range(4):
  744. j = (k + 1) % 4
  745. self.draw_line(
  746. [rotated_rect[k][0], rotated_rect[j][0]],
  747. [rotated_rect[k][1], rotated_rect[j][1]],
  748. color=edge_color,
  749. linestyle="--" if k == 1 else line_style,
  750. linewidth=linewidth,
  751. )
  752. if label is not None:
  753. text_pos = rotated_rect[1] # topleft corner
  754. height_ratio = h / np.sqrt(self.output.height * self.output.width)
  755. label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
  756. font_size = (
  757. np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
  758. )
  759. self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
  760. return self.output
  761. def draw_circle(self, circle_coord, color, radius=3):
  762. """
  763. Args:
  764. circle_coord (list(int) or tuple(int)): contains the x and y coordinates
  765. of the center of the circle.
  766. color: color of the polygon. Refer to `matplotlib.colors` for a full list of
  767. formats that are accepted.
  768. radius (int): radius of the circle.
  769. Returns:
  770. output (VisImage): image object with box drawn.
  771. """
  772. x, y = circle_coord
  773. self.output.ax.add_patch(
  774. mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
  775. )
  776. return self.output
  777. def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
  778. """
  779. Args:
  780. x_data (list[int]): a list containing x values of all the points being drawn.
  781. Length of list should match the length of y_data.
  782. y_data (list[int]): a list containing y values of all the points being drawn.
  783. Length of list should match the length of x_data.
  784. color: color of the line. Refer to `matplotlib.colors` for a full list of
  785. formats that are accepted.
  786. linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
  787. for a full list of formats that are accepted.
  788. linewidth (float or None): width of the line. When it's None,
  789. a default value will be computed and used.
  790. Returns:
  791. output (VisImage): image object with line drawn.
  792. """
  793. if linewidth is None:
  794. linewidth = self._default_font_size / 3
  795. linewidth = max(linewidth, 1)
  796. self.output.ax.add_line(
  797. mpl.lines.Line2D(
  798. x_data,
  799. y_data,
  800. linewidth=linewidth * self.output.scale,
  801. color=color,
  802. linestyle=linestyle,
  803. )
  804. )
  805. return self.output
  806. def draw_binary_mask(
  807. self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=4096
  808. ):
  809. """
  810. Args:
  811. binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
  812. W is the image width. Each value in the array is either a 0 or 1 value of uint8
  813. type.
  814. color: color of the mask. Refer to `matplotlib.colors` for a full list of
  815. formats that are accepted. If None, will pick a random color.
  816. edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
  817. full list of formats that are accepted.
  818. text (str): if None, will be drawn in the object's center of mass.
  819. alpha (float): blending efficient. Smaller values lead to more transparent masks.
  820. area_threshold (float): a connected component small than this will not be shown.
  821. Returns:
  822. output (VisImage): image object with mask drawn.
  823. """
  824. if color is None:
  825. color = random_color(rgb=True, maximum=1)
  826. if area_threshold is None:
  827. area_threshold = 4096
  828. has_valid_segment = False
  829. binary_mask = binary_mask.astype("uint8") # opencv needs uint8
  830. mask = GenericMask(binary_mask, self.output.height, self.output.width)
  831. shape2d = (binary_mask.shape[0], binary_mask.shape[1])
  832. if not mask.has_holes:
  833. # draw polygons for regular masks
  834. for segment in mask.polygons:
  835. area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
  836. if area < area_threshold:
  837. continue
  838. has_valid_segment = True
  839. segment = segment.reshape(-1, 2)
  840. self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
  841. else:
  842. rgba = np.zeros(shape2d + (4,), dtype="float32")
  843. rgba[:, :, :3] = color
  844. rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
  845. has_valid_segment = True
  846. self.output.ax.imshow(rgba)
  847. if text is not None and has_valid_segment:
  848. # TODO sometimes drawn on wrong objects. the heuristics here can improve.
  849. lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
  850. _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
  851. largest_component_id = np.argmax(stats[1:, -1]) + 1
  852. # draw text on the largest component, as well as other very large components.
  853. for cid in range(1, _num_cc):
  854. if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
  855. # median is more stable than centroid
  856. # center = centroids[largest_component_id]
  857. center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
  858. self.draw_text(text, center, color=lighter_color)
  859. return self.output
  860. def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
  861. """
  862. Args:
  863. segment: numpy array of shape Nx2, containing all the points in the polygon.
  864. color: color of the polygon. Refer to `matplotlib.colors` for a full list of
  865. formats that are accepted.
  866. edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
  867. full list of formats that are accepted. If not provided, a darker shade
  868. of the polygon color will be used instead.
  869. alpha (float): blending efficient. Smaller values lead to more transparent masks.
  870. Returns:
  871. output (VisImage): image object with polygon drawn.
  872. """
  873. if edge_color is None:
  874. # make edge color darker than the polygon color
  875. if alpha > 0.8:
  876. edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
  877. else:
  878. edge_color = color
  879. edge_color = mplc.to_rgb(edge_color) + (1,)
  880. polygon = mpl.patches.Polygon(
  881. segment,
  882. fill=True,
  883. facecolor=mplc.to_rgb(color) + (alpha,),
  884. edgecolor=edge_color,
  885. linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
  886. )
  887. self.output.ax.add_patch(polygon)
  888. return self.output
  889. """
  890. Internal methods:
  891. """
  892. def _jitter(self, color):
  893. """
  894. Randomly modifies given color to produce a slightly different color than the color given.
  895. Args:
  896. color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
  897. picked. The values in the list are in the [0.0, 1.0] range.
  898. Returns:
  899. jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
  900. color after being jittered. The values in the list are in the [0.0, 1.0] range.
  901. """
  902. color = mplc.to_rgb(color)
  903. vec = np.random.rand(3)
  904. # better to do it in another color space
  905. vec = vec / np.linalg.norm(vec) * 0.5
  906. res = np.clip(vec + color, 0, 1)
  907. return tuple(res)
  908. def _create_grayscale_image(self, mask=None):
  909. """
  910. Create a grayscale version of the original image.
  911. The colors in masked area, if given, will be kept.
  912. """
  913. img_bw = self.img.astype("f4").mean(axis=2)
  914. img_bw = np.stack([img_bw] * 3, axis=2)
  915. if mask is not None:
  916. img_bw[mask] = self.img[mask]
  917. return img_bw
  918. def _change_color_brightness(self, color, brightness_factor):
  919. """
  920. Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
  921. less or more saturation than the original color.
  922. Args:
  923. color: color of the polygon. Refer to `matplotlib.colors` for a full list of
  924. formats that are accepted.
  925. brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
  926. 0 will correspond to no change, a factor in [-1.0, 0) range will result in
  927. a darker color and a factor in (0, 1.0] range will result in a lighter color.
  928. Returns:
  929. modified_color (tuple[double]): a tuple containing the RGB values of the
  930. modified color. Each value in the tuple is in the [0.0, 1.0] range.
  931. """
  932. assert brightness_factor >= -1.0 and brightness_factor <= 1.0
  933. color = mplc.to_rgb(color)
  934. polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
  935. modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
  936. modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
  937. modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
  938. modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
  939. return modified_color
  940. def _convert_boxes(self, boxes):
  941. """
  942. Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
  943. """
  944. if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
  945. return boxes.tensor.numpy()
  946. else:
  947. return np.asarray(boxes)
  948. def _convert_masks(self, masks_or_polygons):
  949. """
  950. Convert different format of masks or polygons to a tuple of masks and polygons.
  951. Returns:
  952. list[GenericMask]:
  953. """
  954. m = masks_or_polygons
  955. if isinstance(m, PolygonMasks):
  956. m = m.polygons
  957. if isinstance(m, BitMasks):
  958. m = m.tensor.numpy()
  959. if isinstance(m, torch.Tensor):
  960. m = m.numpy()
  961. ret = []
  962. for x in m:
  963. if isinstance(x, GenericMask):
  964. ret.append(x)
  965. else:
  966. ret.append(GenericMask(x, self.output.height, self.output.width))
  967. return ret
  968. def _convert_keypoints(self, keypoints):
  969. if isinstance(keypoints, Keypoints):
  970. keypoints = keypoints.tensor
  971. keypoints = np.asarray(keypoints)
  972. return keypoints
  973. def get_output(self):
  974. """
  975. Returns:
  976. output (VisImage): the image output containing the visualizations added
  977. to the image.
  978. """
  979. return self.output

No Description