You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

xml_style.py 6.2 kB

2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. import os.path as osp
  3. import xml.etree.ElementTree as ET
  4. import mmcv
  5. import numpy as np
  6. from PIL import Image
  7. from .builder import DATASETS
  8. from .custom import CustomDataset
  9. @DATASETS.register_module()
  10. class XMLDataset(CustomDataset):
  11. """XML dataset for detection.
  12. Args:
  13. min_size (int | float, optional): The minimum size of bounding
  14. boxes in the images. If the size of a bounding box is less than
  15. ``min_size``, it would be add to ignored field.
  16. img_subdir (str): Subdir where images are stored. Default: JPEGImages.
  17. ann_subdir (str): Subdir where annotations are. Default: Annotations.
  18. """
  19. def __init__(self,
  20. min_size=None,
  21. img_subdir='JPEGImages',
  22. ann_subdir='Annotations',
  23. **kwargs):
  24. assert self.CLASSES or kwargs.get(
  25. 'classes', None), 'CLASSES in `XMLDataset` can not be None.'
  26. self.img_subdir = img_subdir
  27. self.ann_subdir = ann_subdir
  28. super(XMLDataset, self).__init__(**kwargs)
  29. self.cat2label = {cat: i for i, cat in enumerate(self.CLASSES)}
  30. self.min_size = min_size
  31. def load_annotations(self, ann_file):
  32. """Load annotation from XML style ann_file.
  33. Args:
  34. ann_file (str): Path of XML file.
  35. Returns:
  36. list[dict]: Annotation info from XML file.
  37. """
  38. data_infos = []
  39. img_ids = mmcv.list_from_file(ann_file)
  40. for img_id in img_ids:
  41. filename = osp.join(self.img_subdir, f'{img_id}.jpg')
  42. xml_path = osp.join(self.img_prefix, self.ann_subdir,
  43. f'{img_id}.xml')
  44. tree = ET.parse(xml_path)
  45. root = tree.getroot()
  46. size = root.find('size')
  47. if size is not None:
  48. width = int(size.find('width').text)
  49. height = int(size.find('height').text)
  50. else:
  51. img_path = osp.join(self.img_prefix, filename)
  52. img = Image.open(img_path)
  53. width, height = img.size
  54. data_infos.append(
  55. dict(id=img_id, filename=filename, width=width, height=height))
  56. return data_infos
  57. def _filter_imgs(self, min_size=32):
  58. """Filter images too small or without annotation."""
  59. valid_inds = []
  60. for i, img_info in enumerate(self.data_infos):
  61. if min(img_info['width'], img_info['height']) < min_size:
  62. continue
  63. if self.filter_empty_gt:
  64. img_id = img_info['id']
  65. xml_path = osp.join(self.img_prefix, self.ann_subdir,
  66. f'{img_id}.xml')
  67. tree = ET.parse(xml_path)
  68. root = tree.getroot()
  69. for obj in root.findall('object'):
  70. name = obj.find('name').text
  71. if name in self.CLASSES:
  72. valid_inds.append(i)
  73. break
  74. else:
  75. valid_inds.append(i)
  76. return valid_inds
  77. def get_ann_info(self, idx):
  78. """Get annotation from XML file by index.
  79. Args:
  80. idx (int): Index of data.
  81. Returns:
  82. dict: Annotation info of specified index.
  83. """
  84. img_id = self.data_infos[idx]['id']
  85. xml_path = osp.join(self.img_prefix, self.ann_subdir, f'{img_id}.xml')
  86. tree = ET.parse(xml_path)
  87. root = tree.getroot()
  88. bboxes = []
  89. labels = []
  90. bboxes_ignore = []
  91. labels_ignore = []
  92. for obj in root.findall('object'):
  93. name = obj.find('name').text
  94. if name not in self.CLASSES:
  95. continue
  96. label = self.cat2label[name]
  97. difficult = obj.find('difficult')
  98. difficult = 0 if difficult is None else int(difficult.text)
  99. bnd_box = obj.find('bndbox')
  100. # TODO: check whether it is necessary to use int
  101. # Coordinates may be float type
  102. bbox = [
  103. int(float(bnd_box.find('xmin').text)),
  104. int(float(bnd_box.find('ymin').text)),
  105. int(float(bnd_box.find('xmax').text)),
  106. int(float(bnd_box.find('ymax').text))
  107. ]
  108. ignore = False
  109. if self.min_size:
  110. assert not self.test_mode
  111. w = bbox[2] - bbox[0]
  112. h = bbox[3] - bbox[1]
  113. if w < self.min_size or h < self.min_size:
  114. ignore = True
  115. if difficult or ignore:
  116. bboxes_ignore.append(bbox)
  117. labels_ignore.append(label)
  118. else:
  119. bboxes.append(bbox)
  120. labels.append(label)
  121. if not bboxes:
  122. bboxes = np.zeros((0, 4))
  123. labels = np.zeros((0, ))
  124. else:
  125. bboxes = np.array(bboxes, ndmin=2) - 1
  126. labels = np.array(labels)
  127. if not bboxes_ignore:
  128. bboxes_ignore = np.zeros((0, 4))
  129. labels_ignore = np.zeros((0, ))
  130. else:
  131. bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
  132. labels_ignore = np.array(labels_ignore)
  133. ann = dict(
  134. bboxes=bboxes.astype(np.float32),
  135. labels=labels.astype(np.int64),
  136. bboxes_ignore=bboxes_ignore.astype(np.float32),
  137. labels_ignore=labels_ignore.astype(np.int64))
  138. return ann
  139. def get_cat_ids(self, idx):
  140. """Get category ids in XML file by index.
  141. Args:
  142. idx (int): Index of data.
  143. Returns:
  144. list[int]: All categories in the image of specified index.
  145. """
  146. cat_ids = []
  147. img_id = self.data_infos[idx]['id']
  148. xml_path = osp.join(self.img_prefix, self.ann_subdir, f'{img_id}.xml')
  149. tree = ET.parse(xml_path)
  150. root = tree.getroot()
  151. for obj in root.findall('object'):
  152. name = obj.find('name').text
  153. if name not in self.CLASSES:
  154. continue
  155. label = self.cat2label[name]
  156. cat_ids.append(label)
  157. return cat_ids

No Description

Contributors (2)