|
- # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
- import io
- import logging
- import contextlib
- import os
- import datetime
- import json
- import numpy as np
- import imagesize
-
- from PIL import Image
-
- from fvcore.common.timer import Timer
- from detectron2.structures import BoxMode, PolygonMasks, Boxes
- from fvcore.common.file_io import PathManager
-
-
- from .. import MetadataCatalog, DatasetCatalog
-
- """
- This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format".
- """
-
-
- logger = logging.getLogger(__name__)
-
- __all__ = ["load_coco_json", "load_sem_seg"]
-
-
- def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
- """
- Load a json file with COCO's instances annotation format.
- Currently supports instance detection, instance segmentation,
- and person keypoints annotations.
-
- Args:
- json_file (str): full path to the json file in COCO instances annotation format.
- image_root (str): the directory where the images in this json file exists.
- dataset_name (str): the name of the dataset (e.g., coco_2017_train).
- If provided, this function will also put "thing_classes" into
- the metadata associated with this dataset.
- extra_annotation_keys (list[str]): list of per-annotation keys that should also be
- loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
- "category_id", "segmentation"). The values for these keys will be returned as-is.
- For example, the densepose annotations are loaded in this way.
-
- Returns:
- list[dict]: a list of dicts in Detectron2 standard format. (See
- `Using Custom Datasets </tutorials/datasets.html>`_ )
-
- Notes:
- 1. This function does not read the image files.
- The results do not have the "image" field.
- """
- from pycocotools.coco import COCO
-
- timer = Timer()
- json_file = PathManager.get_local_path(json_file)
- with contextlib.redirect_stdout(io.StringIO()):
- coco_api = COCO(json_file)
- if timer.seconds() > 1:
- logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
-
- id_map = None
- if dataset_name is not None:
- meta = MetadataCatalog.get(dataset_name)
- cat_ids = sorted(coco_api.getCatIds())
- cats = coco_api.loadCats(cat_ids)
- # The categories in a custom json file may not be sorted.
- thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
- meta.thing_classes = thing_classes
-
- # In COCO, certain category ids are artificially removed,
- # and by convention they are always ignored.
- # We deal with COCO's id issue and translate
- # the category ids to contiguous ids in [0, 80).
-
- # It works by looking at the "categories" field in the json, therefore
- # if users' own json also have incontiguous ids, we'll
- # apply this mapping as well but print a warning.
- if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
- if "coco" not in dataset_name:
- logger.warning(
- """
- Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
- """
- )
- id_map = {v: i for i, v in enumerate(cat_ids)}
- meta.thing_dataset_id_to_contiguous_id = id_map
-
- # sort indices for reproducible results
- img_ids = sorted(list(coco_api.imgs.keys()))
- # imgs is a list of dicts, each looks something like:
- # {'license': 4,
- # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
- # 'file_name': 'COCO_val2014_000000001268.jpg',
- # 'height': 427,
- # 'width': 640,
- # 'date_captured': '2013-11-17 05:57:24',
- # 'id': 1268}
- imgs = coco_api.loadImgs(img_ids)
- # anns is a list[list[dict]], where each dict is an annotation
- # record for an object. The inner list enumerates the objects in an image
- # and the outer list enumerates over images. Example of anns[0]:
- # [{'segmentation': [[192.81,
- # 247.09,
- # ...
- # 219.03,
- # 249.06]],
- # 'area': 1035.749,
- # 'iscrowd': 0,
- # 'image_id': 1268,
- # 'bbox': [192.81, 224.8, 74.73, 33.43],
- # 'category_id': 16,
- # 'id': 42986},
- # ...]
- anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
-
- if "minival" not in json_file:
- # The popular valminusminival & minival annotations for COCO2014 contain this bug.
- # However the ratio of buggy annotations there is tiny and does not affect accuracy.
- # Therefore we explicitly white-list them.
- ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
- assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
- json_file
- )
-
- imgs_anns = list(zip(imgs, anns))
-
- logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
-
- dataset_dicts = []
-
- ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])
-
- num_instances_without_valid_segmentation = 0
-
- for (img_dict, anno_dict_list) in imgs_anns:
- record = {}
- record["file_name"] = os.path.join(image_root, img_dict["file_name"])
- record["height"] = img_dict["height"]
- record["width"] = img_dict["width"]
- image_id = record["image_id"] = img_dict["id"]
-
- objs = []
- for anno in anno_dict_list:
- # Check that the image_id in this annotation is the same as
- # the image_id we're looking at.
- # This fails only when the data parsing logic or the annotation file is buggy.
-
- # The original COCO valminusminival2014 & minival2014 annotation files
- # actually contains bugs that, together with certain ways of using COCO API,
- # can trigger this assertion.
- assert anno["image_id"] == image_id
-
- assert anno.get("ignore", 0) == 0
-
- obj = {key: anno[key] for key in ann_keys if key in anno}
-
- segm = anno.get("segmentation", None)
- if segm: # either list[list[float]] or dict(RLE)
- if not isinstance(segm, dict):
- # filter out invalid polygons (< 3 points)
- segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
- if len(segm) == 0:
- num_instances_without_valid_segmentation += 1
- continue # ignore this instance
- obj["segmentation"] = segm
-
- keypts = anno.get("keypoints", None)
- if keypts: # list[int]
- for idx, v in enumerate(keypts):
- if idx % 3 != 2:
- # COCO's segmentation coordinates are floating points in [0, H or W],
- # but keypoint coordinates are integers in [0, H-1 or W-1]
- # Therefore we assume the coordinates are "pixel indices" and
- # add 0.5 to convert to floating point coordinates.
- keypts[idx] = v + 0.5
- obj["keypoints"] = keypts
-
- obj["bbox_mode"] = BoxMode.XYWH_ABS
- if id_map:
- obj["category_id"] = id_map[obj["category_id"]]
- objs.append(obj)
- record["annotations"] = objs
- dataset_dicts.append(record)
-
- if num_instances_without_valid_segmentation > 0:
- logger.warn(
- "Filtered out {} instances without valid segmentation. "
- "There might be issues in your dataset generation process.".format(
- num_instances_without_valid_segmentation
- )
- )
- return dataset_dicts
-
-
- def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
- """
- Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are
- treated as ground truth annotations and all files under "image_root" with "image_ext" extension
- as input images. Ground truth and input images are matched using file paths relative to
- "gt_root" and "image_root" respectively without taking into account file extensions.
- This works for COCO as well as some other datasets.
-
- Args:
- gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
- annotations are stored as images with integer values in pixels that represent
- corresponding semantic labels.
- image_root (str): the directory where the input images are.
- gt_ext (str): file extension for ground truth annotations.
- image_ext (str): file extension for input images.
-
- Returns:
- list[dict]:
- a list of dicts in detectron2 standard format without instance-level
- annotation.
-
- Notes:
- 1. This function does not read the image and ground truth files.
- The results do not have the "image" and "sem_seg" fields.
- """
-
- # We match input images with ground truth based on their relative filepaths (without file
- # extensions) starting from 'image_root' and 'gt_root' respectively.
- def file2id(folder_path, file_path):
- # extract relative path starting from `folder_path`
- image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
- # remove file extension
- image_id = os.path.splitext(image_id)[0]
- return image_id
-
- input_files = sorted(
- (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
- key=lambda file_path: file2id(image_root, file_path),
- )
- gt_files = sorted(
- (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
- key=lambda file_path: file2id(gt_root, file_path),
- )
-
- assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
-
- # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
- if len(input_files) != len(gt_files):
- logger.warn(
- "Directory {} and {} has {} and {} files, respectively.".format(
- image_root, gt_root, len(input_files), len(gt_files)
- )
- )
- input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
- gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
- intersect = list(set(input_basenames) & set(gt_basenames))
- # sort, otherwise each worker may obtain a list[dict] in different order
- intersect = sorted(intersect)
- logger.warn("Will use their intersection of {} files.".format(len(intersect)))
- input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
- gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
-
- logger.info(
- "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root)
- )
-
- dataset_dicts = []
- for (img_path, gt_path) in zip(input_files, gt_files):
- local_path = PathManager.get_local_path(gt_path)
- w, h = imagesize.get(local_path)
- record = {}
- record["file_name"] = img_path
- record["sem_seg_file_name"] = gt_path
- record["height"] = h
- record["width"] = w
- dataset_dicts.append(record)
-
- return dataset_dicts
-
-
- def convert_to_coco_dict(dataset_name):
- """
- Convert a dataset in detectron2's standard format into COCO json format
-
- Generic dataset description can be found here:
- https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset
-
- COCO data format description can be found here:
- http://cocodataset.org/#format-data
-
- Args:
- dataset_name:
- name of the source dataset
- must be registered in DatastCatalog and in detectron2's standard format
- Returns:
- coco_dict: serializable dict in COCO json format
- """
-
- dataset_dicts = DatasetCatalog.get(dataset_name)
- categories = [
- {"id": id, "name": name}
- for id, name in enumerate(MetadataCatalog.get(dataset_name).thing_classes)
- ]
-
- logger.info("Converting dataset dicts into COCO format")
- coco_images = []
- coco_annotations = []
-
- for image_id, image_dict in enumerate(dataset_dicts):
- coco_image = {
- "id": image_dict.get("image_id", image_id),
- "width": image_dict["width"],
- "height": image_dict["height"],
- "file_name": image_dict["file_name"],
- }
- coco_images.append(coco_image)
-
- anns_per_image = image_dict["annotations"]
- for annotation in anns_per_image:
- # create a new dict with only COCO fields
- coco_annotation = {}
-
- # COCO requirement: XYWH box format
- bbox = annotation["bbox"]
- bbox_mode = annotation["bbox_mode"]
- bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)
-
- # COCO requirement: instance area
- if "segmentation" in annotation:
- # Computing areas for instances by counting the pixels
- segmentation = annotation["segmentation"]
- # TODO: check segmentation type: RLE, BinaryMask or Polygon
- polygons = PolygonMasks([segmentation])
- area = polygons.area()[0].item()
- else:
- # Computing areas using bounding boxes
- bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
- area = Boxes([bbox_xy]).area()[0].item()
-
- if "keypoints" in annotation:
- keypoints = annotation["keypoints"] # list[int]
- for idx, v in enumerate(keypoints):
- if idx % 3 != 2:
- # COCO's segmentation coordinates are floating points in [0, H or W],
- # but keypoint coordinates are integers in [0, H-1 or W-1]
- # For COCO format consistency we substract 0.5
- # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
- keypoints[idx] = v - 0.5
- if "num_keypoints" in annotation:
- num_keypoints = annotation["num_keypoints"]
- else:
- num_keypoints = sum(kp > 0 for kp in keypoints[2::3])
-
- # COCO requirement:
- # linking annotations to images
- # "id" field must start with 1
- coco_annotation["id"] = len(coco_annotations) + 1
- coco_annotation["image_id"] = coco_image["id"]
- coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
- coco_annotation["area"] = area
- coco_annotation["category_id"] = annotation["category_id"]
- coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)
-
- # Add optional fields
- if "keypoints" in annotation:
- coco_annotation["keypoints"] = keypoints
- coco_annotation["num_keypoints"] = num_keypoints
-
- if "segmentation" in annotation:
- coco_annotation["segmentation"] = annotation["segmentation"]
-
- coco_annotations.append(coco_annotation)
-
- logger.info(
- "Conversion finished, "
- f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}"
- )
-
- info = {
- "date_created": str(datetime.datetime.now()),
- "description": "Automatically generated COCO json file for Detectron2.",
- }
- coco_dict = {
- "info": info,
- "images": coco_images,
- "annotations": coco_annotations,
- "categories": categories,
- "licenses": None,
- }
- return coco_dict
-
-
- def convert_to_coco_json(dataset_name, output_folder="", allow_cached=True):
- """
- Converts dataset into COCO format and saves it to a json file.
- dataset_name must be registered in DatastCatalog and in detectron2's standard format.
-
- Args:
- dataset_name:
- reference from the config file to the catalogs
- must be registered in DatastCatalog and in detectron2's standard format
- output_folder: where json file will be saved and loaded from
- allow_cached: if json file is already present then skip conversion
- Returns:
- cache_path: path to the COCO-format json file
- """
-
- # TODO: The dataset or the conversion script *may* change,
- # a checksum would be useful for validating the cached data
- cache_path = os.path.join(output_folder, f"{dataset_name}_coco_format.json")
- PathManager.mkdirs(output_folder)
- if os.path.exists(cache_path) and allow_cached:
- logger.info(f"Reading cached annotations in COCO format from:{cache_path} ...")
- else:
- logger.info(f"Converting dataset annotations in '{dataset_name}' to COCO format ...)")
- coco_dict = convert_to_coco_dict(dataset_name)
-
- with PathManager.open(cache_path, "w") as json_file:
- logger.info(f"Caching annotations in COCO format: {cache_path}")
- json.dump(coco_dict, json_file)
-
- return cache_path
-
-
- if __name__ == "__main__":
- """
- Test the COCO json dataset loader.
-
- Usage:
- python -m detectron2.data.datasets.coco \
- path/to/json path/to/image_root dataset_name
-
- "dataset_name" can be "coco_2014_minival_100", or other
- pre-registered ones
- """
- from detectron2.utils.logger import setup_logger
- from detectron2.utils.visualizer import Visualizer
- import detectron2.data.datasets # noqa # add pre-defined metadata
- import sys
-
- logger = setup_logger(name=__name__)
- assert sys.argv[3] in DatasetCatalog.list()
- meta = MetadataCatalog.get(sys.argv[3])
-
- dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3])
- logger.info("Done loading {} samples.".format(len(dicts)))
-
- dirname = "coco-data-vis"
- os.makedirs(dirname, exist_ok=True)
- for d in dicts:
- img = np.array(Image.open(d["file_name"]))
- visualizer = Visualizer(img, metadata=meta)
- vis = visualizer.draw_dataset_dict(d)
- fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
- vis.save(fpath)
|