You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2. import io
  3. import logging
  4. import contextlib
  5. import os
  6. import datetime
  7. import json
  8. import numpy as np
  9. import imagesize
  10. from PIL import Image
  11. from fvcore.common.timer import Timer
  12. from detectron2.structures import BoxMode, PolygonMasks, Boxes
  13. from fvcore.common.file_io import PathManager
  14. from .. import MetadataCatalog, DatasetCatalog
  15. """
  16. This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format".
  17. """
  18. logger = logging.getLogger(__name__)
  19. __all__ = ["load_coco_json", "load_sem_seg"]
  20. def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
  21. """
  22. Load a json file with COCO's instances annotation format.
  23. Currently supports instance detection, instance segmentation,
  24. and person keypoints annotations.
  25. Args:
  26. json_file (str): full path to the json file in COCO instances annotation format.
  27. image_root (str): the directory where the images in this json file exists.
  28. dataset_name (str): the name of the dataset (e.g., coco_2017_train).
  29. If provided, this function will also put "thing_classes" into
  30. the metadata associated with this dataset.
  31. extra_annotation_keys (list[str]): list of per-annotation keys that should also be
  32. loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
  33. "category_id", "segmentation"). The values for these keys will be returned as-is.
  34. For example, the densepose annotations are loaded in this way.
  35. Returns:
  36. list[dict]: a list of dicts in Detectron2 standard format. (See
  37. `Using Custom Datasets </tutorials/datasets.html>`_ )
  38. Notes:
  39. 1. This function does not read the image files.
  40. The results do not have the "image" field.
  41. """
  42. from pycocotools.coco import COCO
  43. timer = Timer()
  44. json_file = PathManager.get_local_path(json_file)
  45. with contextlib.redirect_stdout(io.StringIO()):
  46. coco_api = COCO(json_file)
  47. if timer.seconds() > 1:
  48. logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
  49. id_map = None
  50. if dataset_name is not None:
  51. meta = MetadataCatalog.get(dataset_name)
  52. cat_ids = sorted(coco_api.getCatIds())
  53. cats = coco_api.loadCats(cat_ids)
  54. # The categories in a custom json file may not be sorted.
  55. thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
  56. meta.thing_classes = thing_classes
  57. # In COCO, certain category ids are artificially removed,
  58. # and by convention they are always ignored.
  59. # We deal with COCO's id issue and translate
  60. # the category ids to contiguous ids in [0, 80).
  61. # It works by looking at the "categories" field in the json, therefore
  62. # if users' own json also have incontiguous ids, we'll
  63. # apply this mapping as well but print a warning.
  64. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
  65. if "coco" not in dataset_name:
  66. logger.warning(
  67. """
  68. Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
  69. """
  70. )
  71. id_map = {v: i for i, v in enumerate(cat_ids)}
  72. meta.thing_dataset_id_to_contiguous_id = id_map
  73. # sort indices for reproducible results
  74. img_ids = sorted(list(coco_api.imgs.keys()))
  75. # imgs is a list of dicts, each looks something like:
  76. # {'license': 4,
  77. # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
  78. # 'file_name': 'COCO_val2014_000000001268.jpg',
  79. # 'height': 427,
  80. # 'width': 640,
  81. # 'date_captured': '2013-11-17 05:57:24',
  82. # 'id': 1268}
  83. imgs = coco_api.loadImgs(img_ids)
  84. # anns is a list[list[dict]], where each dict is an annotation
  85. # record for an object. The inner list enumerates the objects in an image
  86. # and the outer list enumerates over images. Example of anns[0]:
  87. # [{'segmentation': [[192.81,
  88. # 247.09,
  89. # ...
  90. # 219.03,
  91. # 249.06]],
  92. # 'area': 1035.749,
  93. # 'iscrowd': 0,
  94. # 'image_id': 1268,
  95. # 'bbox': [192.81, 224.8, 74.73, 33.43],
  96. # 'category_id': 16,
  97. # 'id': 42986},
  98. # ...]
  99. anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
  100. if "minival" not in json_file:
  101. # The popular valminusminival & minival annotations for COCO2014 contain this bug.
  102. # However the ratio of buggy annotations there is tiny and does not affect accuracy.
  103. # Therefore we explicitly white-list them.
  104. ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
  105. assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
  106. json_file
  107. )
  108. imgs_anns = list(zip(imgs, anns))
  109. logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
  110. dataset_dicts = []
  111. ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])
  112. num_instances_without_valid_segmentation = 0
  113. for (img_dict, anno_dict_list) in imgs_anns:
  114. record = {}
  115. record["file_name"] = os.path.join(image_root, img_dict["file_name"])
  116. record["height"] = img_dict["height"]
  117. record["width"] = img_dict["width"]
  118. image_id = record["image_id"] = img_dict["id"]
  119. objs = []
  120. for anno in anno_dict_list:
  121. # Check that the image_id in this annotation is the same as
  122. # the image_id we're looking at.
  123. # This fails only when the data parsing logic or the annotation file is buggy.
  124. # The original COCO valminusminival2014 & minival2014 annotation files
  125. # actually contains bugs that, together with certain ways of using COCO API,
  126. # can trigger this assertion.
  127. assert anno["image_id"] == image_id
  128. assert anno.get("ignore", 0) == 0
  129. obj = {key: anno[key] for key in ann_keys if key in anno}
  130. segm = anno.get("segmentation", None)
  131. if segm: # either list[list[float]] or dict(RLE)
  132. if not isinstance(segm, dict):
  133. # filter out invalid polygons (< 3 points)
  134. segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
  135. if len(segm) == 0:
  136. num_instances_without_valid_segmentation += 1
  137. continue # ignore this instance
  138. obj["segmentation"] = segm
  139. keypts = anno.get("keypoints", None)
  140. if keypts: # list[int]
  141. for idx, v in enumerate(keypts):
  142. if idx % 3 != 2:
  143. # COCO's segmentation coordinates are floating points in [0, H or W],
  144. # but keypoint coordinates are integers in [0, H-1 or W-1]
  145. # Therefore we assume the coordinates are "pixel indices" and
  146. # add 0.5 to convert to floating point coordinates.
  147. keypts[idx] = v + 0.5
  148. obj["keypoints"] = keypts
  149. obj["bbox_mode"] = BoxMode.XYWH_ABS
  150. if id_map:
  151. obj["category_id"] = id_map[obj["category_id"]]
  152. objs.append(obj)
  153. record["annotations"] = objs
  154. dataset_dicts.append(record)
  155. if num_instances_without_valid_segmentation > 0:
  156. logger.warn(
  157. "Filtered out {} instances without valid segmentation. "
  158. "There might be issues in your dataset generation process.".format(
  159. num_instances_without_valid_segmentation
  160. )
  161. )
  162. return dataset_dicts
  163. def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
  164. """
  165. Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are
  166. treated as ground truth annotations and all files under "image_root" with "image_ext" extension
  167. as input images. Ground truth and input images are matched using file paths relative to
  168. "gt_root" and "image_root" respectively without taking into account file extensions.
  169. This works for COCO as well as some other datasets.
  170. Args:
  171. gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
  172. annotations are stored as images with integer values in pixels that represent
  173. corresponding semantic labels.
  174. image_root (str): the directory where the input images are.
  175. gt_ext (str): file extension for ground truth annotations.
  176. image_ext (str): file extension for input images.
  177. Returns:
  178. list[dict]:
  179. a list of dicts in detectron2 standard format without instance-level
  180. annotation.
  181. Notes:
  182. 1. This function does not read the image and ground truth files.
  183. The results do not have the "image" and "sem_seg" fields.
  184. """
  185. # We match input images with ground truth based on their relative filepaths (without file
  186. # extensions) starting from 'image_root' and 'gt_root' respectively.
  187. def file2id(folder_path, file_path):
  188. # extract relative path starting from `folder_path`
  189. image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
  190. # remove file extension
  191. image_id = os.path.splitext(image_id)[0]
  192. return image_id
  193. input_files = sorted(
  194. (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
  195. key=lambda file_path: file2id(image_root, file_path),
  196. )
  197. gt_files = sorted(
  198. (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
  199. key=lambda file_path: file2id(gt_root, file_path),
  200. )
  201. assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
  202. # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
  203. if len(input_files) != len(gt_files):
  204. logger.warn(
  205. "Directory {} and {} has {} and {} files, respectively.".format(
  206. image_root, gt_root, len(input_files), len(gt_files)
  207. )
  208. )
  209. input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
  210. gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
  211. intersect = list(set(input_basenames) & set(gt_basenames))
  212. # sort, otherwise each worker may obtain a list[dict] in different order
  213. intersect = sorted(intersect)
  214. logger.warn("Will use their intersection of {} files.".format(len(intersect)))
  215. input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
  216. gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
  217. logger.info(
  218. "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root)
  219. )
  220. dataset_dicts = []
  221. for (img_path, gt_path) in zip(input_files, gt_files):
  222. local_path = PathManager.get_local_path(gt_path)
  223. w, h = imagesize.get(local_path)
  224. record = {}
  225. record["file_name"] = img_path
  226. record["sem_seg_file_name"] = gt_path
  227. record["height"] = h
  228. record["width"] = w
  229. dataset_dicts.append(record)
  230. return dataset_dicts
  231. def convert_to_coco_dict(dataset_name):
  232. """
  233. Convert a dataset in detectron2's standard format into COCO json format
  234. Generic dataset description can be found here:
  235. https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset
  236. COCO data format description can be found here:
  237. http://cocodataset.org/#format-data
  238. Args:
  239. dataset_name:
  240. name of the source dataset
  241. must be registered in DatastCatalog and in detectron2's standard format
  242. Returns:
  243. coco_dict: serializable dict in COCO json format
  244. """
  245. dataset_dicts = DatasetCatalog.get(dataset_name)
  246. categories = [
  247. {"id": id, "name": name}
  248. for id, name in enumerate(MetadataCatalog.get(dataset_name).thing_classes)
  249. ]
  250. logger.info("Converting dataset dicts into COCO format")
  251. coco_images = []
  252. coco_annotations = []
  253. for image_id, image_dict in enumerate(dataset_dicts):
  254. coco_image = {
  255. "id": image_dict.get("image_id", image_id),
  256. "width": image_dict["width"],
  257. "height": image_dict["height"],
  258. "file_name": image_dict["file_name"],
  259. }
  260. coco_images.append(coco_image)
  261. anns_per_image = image_dict["annotations"]
  262. for annotation in anns_per_image:
  263. # create a new dict with only COCO fields
  264. coco_annotation = {}
  265. # COCO requirement: XYWH box format
  266. bbox = annotation["bbox"]
  267. bbox_mode = annotation["bbox_mode"]
  268. bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)
  269. # COCO requirement: instance area
  270. if "segmentation" in annotation:
  271. # Computing areas for instances by counting the pixels
  272. segmentation = annotation["segmentation"]
  273. # TODO: check segmentation type: RLE, BinaryMask or Polygon
  274. polygons = PolygonMasks([segmentation])
  275. area = polygons.area()[0].item()
  276. else:
  277. # Computing areas using bounding boxes
  278. bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
  279. area = Boxes([bbox_xy]).area()[0].item()
  280. if "keypoints" in annotation:
  281. keypoints = annotation["keypoints"] # list[int]
  282. for idx, v in enumerate(keypoints):
  283. if idx % 3 != 2:
  284. # COCO's segmentation coordinates are floating points in [0, H or W],
  285. # but keypoint coordinates are integers in [0, H-1 or W-1]
  286. # For COCO format consistency we substract 0.5
  287. # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
  288. keypoints[idx] = v - 0.5
  289. if "num_keypoints" in annotation:
  290. num_keypoints = annotation["num_keypoints"]
  291. else:
  292. num_keypoints = sum(kp > 0 for kp in keypoints[2::3])
  293. # COCO requirement:
  294. # linking annotations to images
  295. # "id" field must start with 1
  296. coco_annotation["id"] = len(coco_annotations) + 1
  297. coco_annotation["image_id"] = coco_image["id"]
  298. coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
  299. coco_annotation["area"] = area
  300. coco_annotation["category_id"] = annotation["category_id"]
  301. coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)
  302. # Add optional fields
  303. if "keypoints" in annotation:
  304. coco_annotation["keypoints"] = keypoints
  305. coco_annotation["num_keypoints"] = num_keypoints
  306. if "segmentation" in annotation:
  307. coco_annotation["segmentation"] = annotation["segmentation"]
  308. coco_annotations.append(coco_annotation)
  309. logger.info(
  310. "Conversion finished, "
  311. f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}"
  312. )
  313. info = {
  314. "date_created": str(datetime.datetime.now()),
  315. "description": "Automatically generated COCO json file for Detectron2.",
  316. }
  317. coco_dict = {
  318. "info": info,
  319. "images": coco_images,
  320. "annotations": coco_annotations,
  321. "categories": categories,
  322. "licenses": None,
  323. }
  324. return coco_dict
  325. def convert_to_coco_json(dataset_name, output_folder="", allow_cached=True):
  326. """
  327. Converts dataset into COCO format and saves it to a json file.
  328. dataset_name must be registered in DatastCatalog and in detectron2's standard format.
  329. Args:
  330. dataset_name:
  331. reference from the config file to the catalogs
  332. must be registered in DatastCatalog and in detectron2's standard format
  333. output_folder: where json file will be saved and loaded from
  334. allow_cached: if json file is already present then skip conversion
  335. Returns:
  336. cache_path: path to the COCO-format json file
  337. """
  338. # TODO: The dataset or the conversion script *may* change,
  339. # a checksum would be useful for validating the cached data
  340. cache_path = os.path.join(output_folder, f"{dataset_name}_coco_format.json")
  341. PathManager.mkdirs(output_folder)
  342. if os.path.exists(cache_path) and allow_cached:
  343. logger.info(f"Reading cached annotations in COCO format from:{cache_path} ...")
  344. else:
  345. logger.info(f"Converting dataset annotations in '{dataset_name}' to COCO format ...)")
  346. coco_dict = convert_to_coco_dict(dataset_name)
  347. with PathManager.open(cache_path, "w") as json_file:
  348. logger.info(f"Caching annotations in COCO format: {cache_path}")
  349. json.dump(coco_dict, json_file)
  350. return cache_path
  351. if __name__ == "__main__":
  352. """
  353. Test the COCO json dataset loader.
  354. Usage:
  355. python -m detectron2.data.datasets.coco \
  356. path/to/json path/to/image_root dataset_name
  357. "dataset_name" can be "coco_2014_minival_100", or other
  358. pre-registered ones
  359. """
  360. from detectron2.utils.logger import setup_logger
  361. from detectron2.utils.visualizer import Visualizer
  362. import detectron2.data.datasets # noqa # add pre-defined metadata
  363. import sys
  364. logger = setup_logger(name=__name__)
  365. assert sys.argv[3] in DatasetCatalog.list()
  366. meta = MetadataCatalog.get(sys.argv[3])
  367. dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3])
  368. logger.info("Done loading {} samples.".format(len(dicts)))
  369. dirname = "coco-data-vis"
  370. os.makedirs(dirname, exist_ok=True)
  371. for d in dicts:
  372. img = np.array(Image.open(d["file_name"]))
  373. visualizer = Visualizer(img, metadata=meta)
  374. vis = visualizer.draw_dataset_dict(d)
  375. fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
  376. vis.save(fpath)

No Description