You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

transforms.py 116 kB

2 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. import copy
  3. import inspect
  4. import math
  5. import warnings
  6. import cv2
  7. import mmcv
  8. import numpy as np
  9. from numpy import random
  10. from mmdet.core import PolygonMasks
  11. from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
  12. from ..builder import PIPELINES
  13. try:
  14. from imagecorruptions import corrupt
  15. except ImportError:
  16. corrupt = None
  17. try:
  18. import albumentations
  19. from albumentations import Compose
  20. except ImportError:
  21. albumentations = None
  22. Compose = None
  23. @PIPELINES.register_module()
  24. class Resize:
  25. """Resize images & bbox & mask.
  26. This transform resizes the input image to some scale. Bboxes and masks are
  27. then resized with the same scale factor. If the input dict contains the key
  28. "scale", then the scale in the input dict is used, otherwise the specified
  29. scale in the init method is used. If the input dict contains the key
  30. "scale_factor" (if MultiScaleFlipAug does not give img_scale but
  31. scale_factor), the actual scale will be computed by image shape and
  32. scale_factor.
  33. `img_scale` can either be a tuple (single-scale) or a list of tuple
  34. (multi-scale). There are 3 multiscale modes:
  35. - ``ratio_range is not None``: randomly sample a ratio from the ratio \
  36. range and multiply it with the image scale.
  37. - ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly \
  38. sample a scale from the multiscale range.
  39. - ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly \
  40. sample a scale from multiple scales.
  41. Args:
  42. img_scale (tuple or list[tuple]): Images scales for resizing.
  43. multiscale_mode (str): Either "range" or "value".
  44. ratio_range (tuple[float]): (min_ratio, max_ratio)
  45. keep_ratio (bool): Whether to keep the aspect ratio when resizing the
  46. image.
  47. bbox_clip_border (bool, optional): Whether clip the objects outside
  48. the border of the image. Defaults to True.
  49. backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
  50. These two backends generates slightly different results. Defaults
  51. to 'cv2'.
  52. override (bool, optional): Whether to override `scale` and
  53. `scale_factor` so as to call resize twice. Default False. If True,
  54. after the first resizing, the existed `scale` and `scale_factor`
  55. will be ignored so the second resizing can be allowed.
  56. This option is a work-around for multiple times of resize in DETR.
  57. Defaults to False.
  58. """
  59. def __init__(self,
  60. img_scale=None,
  61. multiscale_mode='range',
  62. ratio_range=None,
  63. keep_ratio=True,
  64. bbox_clip_border=True,
  65. backend='cv2',
  66. override=False):
  67. if img_scale is None:
  68. self.img_scale = None
  69. else:
  70. if isinstance(img_scale, list):
  71. self.img_scale = img_scale
  72. else:
  73. self.img_scale = [img_scale]
  74. assert mmcv.is_list_of(self.img_scale, tuple)
  75. if ratio_range is not None:
  76. # mode 1: given a scale and a range of image ratio
  77. assert len(self.img_scale) == 1
  78. else:
  79. # mode 2: given multiple scales or a range of scales
  80. assert multiscale_mode in ['value', 'range']
  81. self.backend = backend
  82. self.multiscale_mode = multiscale_mode
  83. self.ratio_range = ratio_range
  84. self.keep_ratio = keep_ratio
  85. # TODO: refactor the override option in Resize
  86. self.override = override
  87. self.bbox_clip_border = bbox_clip_border
  88. @staticmethod
  89. def random_select(img_scales):
  90. """Randomly select an img_scale from given candidates.
  91. Args:
  92. img_scales (list[tuple]): Images scales for selection.
  93. Returns:
  94. (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \
  95. where ``img_scale`` is the selected image scale and \
  96. ``scale_idx`` is the selected index in the given candidates.
  97. """
  98. assert mmcv.is_list_of(img_scales, tuple)
  99. scale_idx = np.random.randint(len(img_scales))
  100. img_scale = img_scales[scale_idx]
  101. return img_scale, scale_idx
  102. @staticmethod
  103. def random_sample(img_scales):
  104. """Randomly sample an img_scale when ``multiscale_mode=='range'``.
  105. Args:
  106. img_scales (list[tuple]): Images scale range for sampling.
  107. There must be two tuples in img_scales, which specify the lower
  108. and upper bound of image scales.
  109. Returns:
  110. (tuple, None): Returns a tuple ``(img_scale, None)``, where \
  111. ``img_scale`` is sampled scale and None is just a placeholder \
  112. to be consistent with :func:`random_select`.
  113. """
  114. assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
  115. img_scale_long = [max(s) for s in img_scales]
  116. img_scale_short = [min(s) for s in img_scales]
  117. long_edge = np.random.randint(
  118. min(img_scale_long),
  119. max(img_scale_long) + 1)
  120. short_edge = np.random.randint(
  121. min(img_scale_short),
  122. max(img_scale_short) + 1)
  123. img_scale = (long_edge, short_edge)
  124. return img_scale, None
  125. @staticmethod
  126. def random_sample_ratio(img_scale, ratio_range):
  127. """Randomly sample an img_scale when ``ratio_range`` is specified.
  128. A ratio will be randomly sampled from the range specified by
  129. ``ratio_range``. Then it would be multiplied with ``img_scale`` to
  130. generate sampled scale.
  131. Args:
  132. img_scale (tuple): Images scale base to multiply with ratio.
  133. ratio_range (tuple[float]): The minimum and maximum ratio to scale
  134. the ``img_scale``.
  135. Returns:
  136. (tuple, None): Returns a tuple ``(scale, None)``, where \
  137. ``scale`` is sampled ratio multiplied with ``img_scale`` and \
  138. None is just a placeholder to be consistent with \
  139. :func:`random_select`.
  140. """
  141. assert isinstance(img_scale, tuple) and len(img_scale) == 2
  142. min_ratio, max_ratio = ratio_range
  143. assert min_ratio <= max_ratio
  144. ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
  145. scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
  146. return scale, None
  147. def _random_scale(self, results):
  148. """Randomly sample an img_scale according to ``ratio_range`` and
  149. ``multiscale_mode``.
  150. If ``ratio_range`` is specified, a ratio will be sampled and be
  151. multiplied with ``img_scale``.
  152. If multiple scales are specified by ``img_scale``, a scale will be
  153. sampled according to ``multiscale_mode``.
  154. Otherwise, single scale will be used.
  155. Args:
  156. results (dict): Result dict from :obj:`dataset`.
  157. Returns:
  158. dict: Two new keys 'scale` and 'scale_idx` are added into \
  159. ``results``, which would be used by subsequent pipelines.
  160. """
  161. if self.ratio_range is not None:
  162. scale, scale_idx = self.random_sample_ratio(
  163. self.img_scale[0], self.ratio_range)
  164. elif len(self.img_scale) == 1:
  165. scale, scale_idx = self.img_scale[0], 0
  166. elif self.multiscale_mode == 'range':
  167. scale, scale_idx = self.random_sample(self.img_scale)
  168. elif self.multiscale_mode == 'value':
  169. scale, scale_idx = self.random_select(self.img_scale)
  170. else:
  171. raise NotImplementedError
  172. results['scale'] = scale
  173. results['scale_idx'] = scale_idx
  174. def _resize_img(self, results):
  175. """Resize images with ``results['scale']``."""
  176. for key in results.get('img_fields', ['img']):
  177. if self.keep_ratio:
  178. img, scale_factor = mmcv.imrescale(
  179. results[key],
  180. results['scale'],
  181. return_scale=True,
  182. backend=self.backend)
  183. # the w_scale and h_scale has minor difference
  184. # a real fix should be done in the mmcv.imrescale in the future
  185. new_h, new_w = img.shape[:2]
  186. h, w = results[key].shape[:2]
  187. w_scale = new_w / w
  188. h_scale = new_h / h
  189. else:
  190. img, w_scale, h_scale = mmcv.imresize(
  191. results[key],
  192. results['scale'],
  193. return_scale=True,
  194. backend=self.backend)
  195. results[key] = img
  196. scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
  197. dtype=np.float32)
  198. results['img_shape'] = img.shape
  199. # in case that there is no padding
  200. results['pad_shape'] = img.shape
  201. results['scale_factor'] = scale_factor
  202. results['keep_ratio'] = self.keep_ratio
  203. def _resize_bboxes(self, results):
  204. """Resize bounding boxes with ``results['scale_factor']``."""
  205. for key in results.get('bbox_fields', []):
  206. bboxes = results[key] * results['scale_factor']
  207. if self.bbox_clip_border:
  208. img_shape = results['img_shape']
  209. bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
  210. bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
  211. results[key] = bboxes
  212. def _resize_masks(self, results):
  213. """Resize masks with ``results['scale']``"""
  214. for key in results.get('mask_fields', []):
  215. if results[key] is None:
  216. continue
  217. if self.keep_ratio:
  218. results[key] = results[key].rescale(results['scale'])
  219. else:
  220. results[key] = results[key].resize(results['img_shape'][:2])
  221. def _resize_seg(self, results):
  222. """Resize semantic segmentation map with ``results['scale']``."""
  223. for key in results.get('seg_fields', []):
  224. if self.keep_ratio:
  225. gt_seg = mmcv.imrescale(
  226. results[key],
  227. results['scale'],
  228. interpolation='nearest',
  229. backend=self.backend)
  230. else:
  231. gt_seg = mmcv.imresize(
  232. results[key],
  233. results['scale'],
  234. interpolation='nearest',
  235. backend=self.backend)
  236. results[key] = gt_seg
  237. def __call__(self, results):
  238. """Call function to resize images, bounding boxes, masks, semantic
  239. segmentation map.
  240. Args:
  241. results (dict): Result dict from loading pipeline.
  242. Returns:
  243. dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', \
  244. 'keep_ratio' keys are added into result dict.
  245. """
  246. if 'scale' not in results:
  247. if 'scale_factor' in results:
  248. img_shape = results['img'].shape[:2]
  249. scale_factor = results['scale_factor']
  250. assert isinstance(scale_factor, float)
  251. results['scale'] = tuple(
  252. [int(x * scale_factor) for x in img_shape][::-1])
  253. else:
  254. self._random_scale(results)
  255. else:
  256. if not self.override:
  257. assert 'scale_factor' not in results, (
  258. 'scale and scale_factor cannot be both set.')
  259. else:
  260. results.pop('scale')
  261. if 'scale_factor' in results:
  262. results.pop('scale_factor')
  263. self._random_scale(results)
  264. self._resize_img(results)
  265. self._resize_bboxes(results)
  266. self._resize_masks(results)
  267. self._resize_seg(results)
  268. return results
  269. def __repr__(self):
  270. repr_str = self.__class__.__name__
  271. repr_str += f'(img_scale={self.img_scale}, '
  272. repr_str += f'multiscale_mode={self.multiscale_mode}, '
  273. repr_str += f'ratio_range={self.ratio_range}, '
  274. repr_str += f'keep_ratio={self.keep_ratio}, '
  275. repr_str += f'bbox_clip_border={self.bbox_clip_border})'
  276. return repr_str
  277. @PIPELINES.register_module()
  278. class RandomFlip:
  279. """Flip the image & bbox & mask.
  280. If the input dict contains the key "flip", then the flag will be used,
  281. otherwise it will be randomly decided by a ratio specified in the init
  282. method.
  283. When random flip is enabled, ``flip_ratio``/``direction`` can either be a
  284. float/string or tuple of float/string. There are 3 flip modes:
  285. - ``flip_ratio`` is float, ``direction`` is string: the image will be
  286. ``direction``ly flipped with probability of ``flip_ratio`` .
  287. E.g., ``flip_ratio=0.5``, ``direction='horizontal'``,
  288. then image will be horizontally flipped with probability of 0.5.
  289. - ``flip_ratio`` is float, ``direction`` is list of string: the image will
  290. be ``direction[i]``ly flipped with probability of
  291. ``flip_ratio/len(direction)``.
  292. E.g., ``flip_ratio=0.5``, ``direction=['horizontal', 'vertical']``,
  293. then image will be horizontally flipped with probability of 0.25,
  294. vertically with probability of 0.25.
  295. - ``flip_ratio`` is list of float, ``direction`` is list of string:
  296. given ``len(flip_ratio) == len(direction)``, the image will
  297. be ``direction[i]``ly flipped with probability of ``flip_ratio[i]``.
  298. E.g., ``flip_ratio=[0.3, 0.5]``, ``direction=['horizontal',
  299. 'vertical']``, then image will be horizontally flipped with probability
  300. of 0.3, vertically with probability of 0.5.
  301. Args:
  302. flip_ratio (float | list[float], optional): The flipping probability.
  303. Default: None.
  304. direction(str | list[str], optional): The flipping direction. Options
  305. are 'horizontal', 'vertical', 'diagonal'. Default: 'horizontal'.
  306. If input is a list, the length must equal ``flip_ratio``. Each
  307. element in ``flip_ratio`` indicates the flip probability of
  308. corresponding direction.
  309. """
  310. def __init__(self, flip_ratio=None, direction='horizontal'):
  311. if isinstance(flip_ratio, list):
  312. assert mmcv.is_list_of(flip_ratio, float)
  313. assert 0 <= sum(flip_ratio) <= 1
  314. elif isinstance(flip_ratio, float):
  315. assert 0 <= flip_ratio <= 1
  316. elif flip_ratio is None:
  317. pass
  318. else:
  319. raise ValueError('flip_ratios must be None, float, '
  320. 'or list of float')
  321. self.flip_ratio = flip_ratio
  322. valid_directions = ['horizontal', 'vertical', 'diagonal']
  323. if isinstance(direction, str):
  324. assert direction in valid_directions
  325. elif isinstance(direction, list):
  326. assert mmcv.is_list_of(direction, str)
  327. assert set(direction).issubset(set(valid_directions))
  328. else:
  329. raise ValueError('direction must be either str or list of str')
  330. self.direction = direction
  331. if isinstance(flip_ratio, list):
  332. assert len(self.flip_ratio) == len(self.direction)
  333. def bbox_flip(self, bboxes, img_shape, direction):
  334. """Flip bboxes horizontally.
  335. Args:
  336. bboxes (numpy.ndarray): Bounding boxes, shape (..., 4*k)
  337. img_shape (tuple[int]): Image shape (height, width)
  338. direction (str): Flip direction. Options are 'horizontal',
  339. 'vertical'.
  340. Returns:
  341. numpy.ndarray: Flipped bounding boxes.
  342. """
  343. assert bboxes.shape[-1] % 4 == 0
  344. flipped = bboxes.copy()
  345. if direction == 'horizontal':
  346. w = img_shape[1]
  347. flipped[..., 0::4] = w - bboxes[..., 2::4]
  348. flipped[..., 2::4] = w - bboxes[..., 0::4]
  349. elif direction == 'vertical':
  350. h = img_shape[0]
  351. flipped[..., 1::4] = h - bboxes[..., 3::4]
  352. flipped[..., 3::4] = h - bboxes[..., 1::4]
  353. elif direction == 'diagonal':
  354. w = img_shape[1]
  355. h = img_shape[0]
  356. flipped[..., 0::4] = w - bboxes[..., 2::4]
  357. flipped[..., 1::4] = h - bboxes[..., 3::4]
  358. flipped[..., 2::4] = w - bboxes[..., 0::4]
  359. flipped[..., 3::4] = h - bboxes[..., 1::4]
  360. else:
  361. raise ValueError(f"Invalid flipping direction '{direction}'")
  362. return flipped
  363. def __call__(self, results):
  364. """Call function to flip bounding boxes, masks, semantic segmentation
  365. maps.
  366. Args:
  367. results (dict): Result dict from loading pipeline.
  368. Returns:
  369. dict: Flipped results, 'flip', 'flip_direction' keys are added \
  370. into result dict.
  371. """
  372. if 'flip' not in results:
  373. if isinstance(self.direction, list):
  374. # None means non-flip
  375. direction_list = self.direction + [None]
  376. else:
  377. # None means non-flip
  378. direction_list = [self.direction, None]
  379. if isinstance(self.flip_ratio, list):
  380. non_flip_ratio = 1 - sum(self.flip_ratio)
  381. flip_ratio_list = self.flip_ratio + [non_flip_ratio]
  382. else:
  383. non_flip_ratio = 1 - self.flip_ratio
  384. # exclude non-flip
  385. single_ratio = self.flip_ratio / (len(direction_list) - 1)
  386. flip_ratio_list = [single_ratio] * (len(direction_list) -
  387. 1) + [non_flip_ratio]
  388. cur_dir = np.random.choice(direction_list, p=flip_ratio_list)
  389. results['flip'] = cur_dir is not None
  390. if 'flip_direction' not in results:
  391. results['flip_direction'] = cur_dir
  392. if results['flip']:
  393. # flip image
  394. for key in results.get('img_fields', ['img']):
  395. results[key] = mmcv.imflip(
  396. results[key], direction=results['flip_direction'])
  397. # flip bboxes
  398. for key in results.get('bbox_fields', []):
  399. results[key] = self.bbox_flip(results[key],
  400. results['img_shape'],
  401. results['flip_direction'])
  402. # flip masks
  403. for key in results.get('mask_fields', []):
  404. results[key] = results[key].flip(results['flip_direction'])
  405. # flip segs
  406. for key in results.get('seg_fields', []):
  407. results[key] = mmcv.imflip(
  408. results[key], direction=results['flip_direction'])
  409. return results
  410. def __repr__(self):
  411. return self.__class__.__name__ + f'(flip_ratio={self.flip_ratio})'
  412. @PIPELINES.register_module()
  413. class RandomShift:
  414. """Shift the image and box given shift pixels and probability.
  415. Args:
  416. shift_ratio (float): Probability of shifts. Default 0.5.
  417. max_shift_px (int): The max pixels for shifting. Default 32.
  418. filter_thr_px (int): The width and height threshold for filtering.
  419. The bbox and the rest of the targets below the width and
  420. height threshold will be filtered. Default 1.
  421. """
  422. def __init__(self, shift_ratio=0.5, max_shift_px=32, filter_thr_px=1):
  423. assert 0 <= shift_ratio <= 1
  424. assert max_shift_px >= 0
  425. self.shift_ratio = shift_ratio
  426. self.max_shift_px = max_shift_px
  427. self.filter_thr_px = int(filter_thr_px)
  428. # The key correspondence from bboxes to labels.
  429. self.bbox2label = {
  430. 'gt_bboxes': 'gt_labels',
  431. 'gt_bboxes_ignore': 'gt_labels_ignore'
  432. }
  433. def __call__(self, results):
  434. """Call function to random shift images, bounding boxes.
  435. Args:
  436. results (dict): Result dict from loading pipeline.
  437. Returns:
  438. dict: Shift results.
  439. """
  440. if random.random() < self.shift_ratio:
  441. img_shape = results['img'].shape[:2]
  442. random_shift_x = random.randint(-self.max_shift_px,
  443. self.max_shift_px)
  444. random_shift_y = random.randint(-self.max_shift_px,
  445. self.max_shift_px)
  446. new_x = max(0, random_shift_x)
  447. orig_x = max(0, -random_shift_x)
  448. new_y = max(0, random_shift_y)
  449. orig_y = max(0, -random_shift_y)
  450. # TODO: support mask and semantic segmentation maps.
  451. for key in results.get('bbox_fields', []):
  452. bboxes = results[key].copy()
  453. bboxes[..., 0::2] += random_shift_x
  454. bboxes[..., 1::2] += random_shift_y
  455. # clip border
  456. bboxes[..., 0::2] = np.clip(bboxes[..., 0::2], 0, img_shape[1])
  457. bboxes[..., 1::2] = np.clip(bboxes[..., 1::2], 0, img_shape[0])
  458. # remove invalid bboxes
  459. bbox_w = bboxes[..., 2] - bboxes[..., 0]
  460. bbox_h = bboxes[..., 3] - bboxes[..., 1]
  461. valid_inds = (bbox_w > self.filter_thr_px) & (
  462. bbox_h > self.filter_thr_px)
  463. # If the shift does not contain any gt-bbox area, skip this
  464. # image.
  465. if key == 'gt_bboxes' and not valid_inds.any():
  466. return results
  467. bboxes = bboxes[valid_inds]
  468. results[key] = bboxes
  469. # label fields. e.g. gt_labels and gt_labels_ignore
  470. label_key = self.bbox2label.get(key)
  471. if label_key in results:
  472. results[label_key] = results[label_key][valid_inds]
  473. for key in results.get('img_fields', ['img']):
  474. img = results[key]
  475. new_img = np.zeros_like(img)
  476. img_h, img_w = img.shape[:2]
  477. new_h = img_h - np.abs(random_shift_y)
  478. new_w = img_w - np.abs(random_shift_x)
  479. new_img[new_y:new_y + new_h, new_x:new_x + new_w] \
  480. = img[orig_y:orig_y + new_h, orig_x:orig_x + new_w]
  481. results[key] = new_img
  482. return results
  483. def __repr__(self):
  484. repr_str = self.__class__.__name__
  485. repr_str += f'(max_shift_px={self.max_shift_px}, '
  486. return repr_str
  487. @PIPELINES.register_module()
  488. class Pad:
  489. """Pad the image & masks & segmentation map.
  490. There are two padding modes: (1) pad to a fixed size and (2) pad to the
  491. minimum size that is divisible by some number.
  492. Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
  493. Args:
  494. size (tuple, optional): Fixed padding size.
  495. size_divisor (int, optional): The divisor of padded size.
  496. pad_to_square (bool): Whether to pad the image into a square.
  497. Currently only used for YOLOX. Default: False.
  498. pad_val (dict, optional): A dict for padding value, the default
  499. value is `dict(img=0, masks=0, seg=255)`.
  500. """
  501. def __init__(self,
  502. size=None,
  503. size_divisor=None,
  504. pad_to_square=False,
  505. pad_val=dict(img=0, masks=0, seg=255)):
  506. self.size = size
  507. self.size_divisor = size_divisor
  508. if isinstance(pad_val, float) or isinstance(pad_val, int):
  509. warnings.warn(
  510. 'pad_val of float type is deprecated now, '
  511. f'please use pad_val=dict(img={pad_val}, '
  512. f'masks={pad_val}, seg=255) instead.', DeprecationWarning)
  513. pad_val = dict(img=pad_val, masks=pad_val, seg=255)
  514. assert isinstance(pad_val, dict)
  515. self.pad_val = pad_val
  516. self.pad_to_square = pad_to_square
  517. if pad_to_square:
  518. assert size is None and size_divisor is None, \
  519. 'The size and size_divisor must be None ' \
  520. 'when pad2square is True'
  521. else:
  522. assert size is not None or size_divisor is not None, \
  523. 'only one of size and size_divisor should be valid'
  524. assert size is None or size_divisor is None
  525. def _pad_img(self, results):
  526. """Pad images according to ``self.size``."""
  527. pad_val = self.pad_val.get('img', 0)
  528. for key in results.get('img_fields', ['img']):
  529. if self.pad_to_square:
  530. max_size = max(results[key].shape[:2])
  531. self.size = (max_size, max_size)
  532. if self.size is not None:
  533. padded_img = mmcv.impad(
  534. results[key], shape=self.size, pad_val=pad_val)
  535. elif self.size_divisor is not None:
  536. padded_img = mmcv.impad_to_multiple(
  537. results[key], self.size_divisor, pad_val=pad_val)
  538. results[key] = padded_img
  539. results['pad_shape'] = padded_img.shape
  540. results['pad_fixed_size'] = self.size
  541. results['pad_size_divisor'] = self.size_divisor
  542. def _pad_masks(self, results):
  543. """Pad masks according to ``results['pad_shape']``."""
  544. pad_shape = results['pad_shape'][:2]
  545. pad_val = self.pad_val.get('masks', 0)
  546. for key in results.get('mask_fields', []):
  547. results[key] = results[key].pad(pad_shape, pad_val=pad_val)
  548. def _pad_seg(self, results):
  549. """Pad semantic segmentation map according to
  550. ``results['pad_shape']``."""
  551. pad_val = self.pad_val.get('seg', 255)
  552. for key in results.get('seg_fields', []):
  553. results[key] = mmcv.impad(
  554. results[key], shape=results['pad_shape'][:2], pad_val=pad_val)
  555. def __call__(self, results):
  556. """Call function to pad images, masks, semantic segmentation maps.
  557. Args:
  558. results (dict): Result dict from loading pipeline.
  559. Returns:
  560. dict: Updated result dict.
  561. """
  562. self._pad_img(results)
  563. self._pad_masks(results)
  564. self._pad_seg(results)
  565. return results
  566. def __repr__(self):
  567. repr_str = self.__class__.__name__
  568. repr_str += f'(size={self.size}, '
  569. repr_str += f'size_divisor={self.size_divisor}, '
  570. repr_str += f'pad_to_square={self.pad_to_square}, '
  571. repr_str += f'pad_val={self.pad_val})'
  572. return repr_str
  573. @PIPELINES.register_module()
  574. class Normalize:
  575. """Normalize the image.
  576. Added key is "img_norm_cfg".
  577. Args:
  578. mean (sequence): Mean values of 3 channels.
  579. std (sequence): Std values of 3 channels.
  580. to_rgb (bool): Whether to convert the image from BGR to RGB,
  581. default is true.
  582. """
  583. def __init__(self, mean, std, to_rgb=True):
  584. self.mean = np.array(mean, dtype=np.float32)
  585. self.std = np.array(std, dtype=np.float32)
  586. self.to_rgb = to_rgb
  587. def __call__(self, results):
  588. """Call function to normalize images.
  589. Args:
  590. results (dict): Result dict from loading pipeline.
  591. Returns:
  592. dict: Normalized results, 'img_norm_cfg' key is added into
  593. result dict.
  594. """
  595. for key in results.get('img_fields', ['img']):
  596. results[key] = mmcv.imnormalize(results[key], self.mean, self.std,
  597. self.to_rgb)
  598. results['img_norm_cfg'] = dict(
  599. mean=self.mean, std=self.std, to_rgb=self.to_rgb)
  600. return results
  601. def __repr__(self):
  602. repr_str = self.__class__.__name__
  603. repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})'
  604. return repr_str
  605. @PIPELINES.register_module()
  606. class RandomCrop:
  607. """Random crop the image & bboxes & masks.
  608. The absolute `crop_size` is sampled based on `crop_type` and `image_size`,
  609. then the cropped results are generated.
  610. Args:
  611. crop_size (tuple): The relative ratio or absolute pixels of
  612. height and width.
  613. crop_type (str, optional): one of "relative_range", "relative",
  614. "absolute", "absolute_range". "relative" randomly crops
  615. (h * crop_size[0], w * crop_size[1]) part from an input of size
  616. (h, w). "relative_range" uniformly samples relative crop size from
  617. range [crop_size[0], 1] and [crop_size[1], 1] for height and width
  618. respectively. "absolute" crops from an input with absolute size
  619. (crop_size[0], crop_size[1]). "absolute_range" uniformly samples
  620. crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w
  621. in range [crop_size[0], min(w, crop_size[1])]. Default "absolute".
  622. allow_negative_crop (bool, optional): Whether to allow a crop that does
  623. not contain any bbox area. Default False.
  624. recompute_bbox (bool, optional): Whether to re-compute the boxes based
  625. on cropped instance masks. Default False.
  626. bbox_clip_border (bool, optional): Whether clip the objects outside
  627. the border of the image. Defaults to True.
  628. Note:
  629. - If the image is smaller than the absolute crop size, return the
  630. original image.
  631. - The keys for bboxes, labels and masks must be aligned. That is,
  632. `gt_bboxes` corresponds to `gt_labels` and `gt_masks`, and
  633. `gt_bboxes_ignore` corresponds to `gt_labels_ignore` and
  634. `gt_masks_ignore`.
  635. - If the crop does not contain any gt-bbox region and
  636. `allow_negative_crop` is set to False, skip this image.
  637. """
  638. def __init__(self,
  639. crop_size,
  640. crop_type='absolute',
  641. allow_negative_crop=False,
  642. recompute_bbox=False,
  643. bbox_clip_border=True):
  644. if crop_type not in [
  645. 'relative_range', 'relative', 'absolute', 'absolute_range'
  646. ]:
  647. raise ValueError(f'Invalid crop_type {crop_type}.')
  648. if crop_type in ['absolute', 'absolute_range']:
  649. assert crop_size[0] > 0 and crop_size[1] > 0
  650. assert isinstance(crop_size[0], int) and isinstance(
  651. crop_size[1], int)
  652. else:
  653. assert 0 < crop_size[0] <= 1 and 0 < crop_size[1] <= 1
  654. self.crop_size = crop_size
  655. self.crop_type = crop_type
  656. self.allow_negative_crop = allow_negative_crop
  657. self.bbox_clip_border = bbox_clip_border
  658. self.recompute_bbox = recompute_bbox
  659. # The key correspondence from bboxes to labels and masks.
  660. self.bbox2label = {
  661. 'gt_bboxes': 'gt_labels',
  662. 'gt_bboxes_ignore': 'gt_labels_ignore'
  663. }
  664. self.bbox2mask = {
  665. 'gt_bboxes': 'gt_masks',
  666. 'gt_bboxes_ignore': 'gt_masks_ignore'
  667. }
  668. def _crop_data(self, results, crop_size, allow_negative_crop):
  669. """Function to randomly crop images, bounding boxes, masks, semantic
  670. segmentation maps.
  671. Args:
  672. results (dict): Result dict from loading pipeline.
  673. crop_size (tuple): Expected absolute size after cropping, (h, w).
  674. allow_negative_crop (bool): Whether to allow a crop that does not
  675. contain any bbox area. Default to False.
  676. Returns:
  677. dict: Randomly cropped results, 'img_shape' key in result dict is
  678. updated according to crop size.
  679. """
  680. assert crop_size[0] > 0 and crop_size[1] > 0
  681. for key in results.get('img_fields', ['img']):
  682. img = results[key]
  683. margin_h = max(img.shape[0] - crop_size[0], 0)
  684. margin_w = max(img.shape[1] - crop_size[1], 0)
  685. offset_h = np.random.randint(0, margin_h + 1)
  686. offset_w = np.random.randint(0, margin_w + 1)
  687. crop_y1, crop_y2 = offset_h, offset_h + crop_size[0]
  688. crop_x1, crop_x2 = offset_w, offset_w + crop_size[1]
  689. # crop the image
  690. img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
  691. img_shape = img.shape
  692. results[key] = img
  693. results['img_shape'] = img_shape
  694. # crop bboxes accordingly and clip to the image boundary
  695. for key in results.get('bbox_fields', []):
  696. # e.g. gt_bboxes and gt_bboxes_ignore
  697. bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h],
  698. dtype=np.float32)
  699. bboxes = results[key] - bbox_offset
  700. if self.bbox_clip_border:
  701. bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
  702. bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
  703. valid_inds = (bboxes[:, 2] > bboxes[:, 0]) & (
  704. bboxes[:, 3] > bboxes[:, 1])
  705. # If the crop does not contain any gt-bbox area and
  706. # allow_negative_crop is False, skip this image.
  707. if (key == 'gt_bboxes' and not valid_inds.any()
  708. and not allow_negative_crop):
  709. return None
  710. results[key] = bboxes[valid_inds, :]
  711. # label fields. e.g. gt_labels and gt_labels_ignore
  712. label_key = self.bbox2label.get(key)
  713. if label_key in results:
  714. results[label_key] = results[label_key][valid_inds]
  715. # mask fields, e.g. gt_masks and gt_masks_ignore
  716. mask_key = self.bbox2mask.get(key)
  717. if mask_key in results:
  718. results[mask_key] = results[mask_key][
  719. valid_inds.nonzero()[0]].crop(
  720. np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]))
  721. if self.recompute_bbox:
  722. results[key] = results[mask_key].get_bboxes()
  723. # crop semantic seg
  724. for key in results.get('seg_fields', []):
  725. results[key] = results[key][crop_y1:crop_y2, crop_x1:crop_x2]
  726. return results
  727. def _get_crop_size(self, image_size):
  728. """Randomly generates the absolute crop size based on `crop_type` and
  729. `image_size`.
  730. Args:
  731. image_size (tuple): (h, w).
  732. Returns:
  733. crop_size (tuple): (crop_h, crop_w) in absolute pixels.
  734. """
  735. h, w = image_size
  736. if self.crop_type == 'absolute':
  737. return (min(self.crop_size[0], h), min(self.crop_size[1], w))
  738. elif self.crop_type == 'absolute_range':
  739. assert self.crop_size[0] <= self.crop_size[1]
  740. crop_h = np.random.randint(
  741. min(h, self.crop_size[0]),
  742. min(h, self.crop_size[1]) + 1)
  743. crop_w = np.random.randint(
  744. min(w, self.crop_size[0]),
  745. min(w, self.crop_size[1]) + 1)
  746. return crop_h, crop_w
  747. elif self.crop_type == 'relative':
  748. crop_h, crop_w = self.crop_size
  749. return int(h * crop_h + 0.5), int(w * crop_w + 0.5)
  750. elif self.crop_type == 'relative_range':
  751. crop_size = np.asarray(self.crop_size, dtype=np.float32)
  752. crop_h, crop_w = crop_size + np.random.rand(2) * (1 - crop_size)
  753. return int(h * crop_h + 0.5), int(w * crop_w + 0.5)
  754. def __call__(self, results):
  755. """Call function to randomly crop images, bounding boxes, masks,
  756. semantic segmentation maps.
  757. Args:
  758. results (dict): Result dict from loading pipeline.
  759. Returns:
  760. dict: Randomly cropped results, 'img_shape' key in result dict is
  761. updated according to crop size.
  762. """
  763. image_size = results['img'].shape[:2]
  764. crop_size = self._get_crop_size(image_size)
  765. results = self._crop_data(results, crop_size, self.allow_negative_crop)
  766. return results
  767. def __repr__(self):
  768. repr_str = self.__class__.__name__
  769. repr_str += f'(crop_size={self.crop_size}, '
  770. repr_str += f'crop_type={self.crop_type}, '
  771. repr_str += f'allow_negative_crop={self.allow_negative_crop}, '
  772. repr_str += f'bbox_clip_border={self.bbox_clip_border})'
  773. return repr_str
  774. @PIPELINES.register_module()
  775. class SegRescale:
  776. """Rescale semantic segmentation maps.
  777. Args:
  778. scale_factor (float): The scale factor of the final output.
  779. backend (str): Image rescale backend, choices are 'cv2' and 'pillow'.
  780. These two backends generates slightly different results. Defaults
  781. to 'cv2'.
  782. """
  783. def __init__(self, scale_factor=1, backend='cv2'):
  784. self.scale_factor = scale_factor
  785. self.backend = backend
  786. def __call__(self, results):
  787. """Call function to scale the semantic segmentation map.
  788. Args:
  789. results (dict): Result dict from loading pipeline.
  790. Returns:
  791. dict: Result dict with semantic segmentation map scaled.
  792. """
  793. for key in results.get('seg_fields', []):
  794. if self.scale_factor != 1:
  795. results[key] = mmcv.imrescale(
  796. results[key],
  797. self.scale_factor,
  798. interpolation='nearest',
  799. backend=self.backend)
  800. return results
  801. def __repr__(self):
  802. return self.__class__.__name__ + f'(scale_factor={self.scale_factor})'
  803. @PIPELINES.register_module()
  804. class PhotoMetricDistortion:
  805. """Apply photometric distortion to image sequentially, every transformation
  806. is applied with a probability of 0.5. The position of random contrast is in
  807. second or second to last.
  808. 1. random brightness
  809. 2. random contrast (mode 0)
  810. 3. convert color from BGR to HSV
  811. 4. random saturation
  812. 5. random hue
  813. 6. convert color from HSV to BGR
  814. 7. random contrast (mode 1)
  815. 8. randomly swap channels
  816. Args:
  817. brightness_delta (int): delta of brightness.
  818. contrast_range (tuple): range of contrast.
  819. saturation_range (tuple): range of saturation.
  820. hue_delta (int): delta of hue.
  821. """
  822. def __init__(self,
  823. brightness_delta=32,
  824. contrast_range=(0.5, 1.5),
  825. saturation_range=(0.5, 1.5),
  826. hue_delta=18):
  827. self.brightness_delta = brightness_delta
  828. self.contrast_lower, self.contrast_upper = contrast_range
  829. self.saturation_lower, self.saturation_upper = saturation_range
  830. self.hue_delta = hue_delta
  831. def __call__(self, results):
  832. """Call function to perform photometric distortion on images.
  833. Args:
  834. results (dict): Result dict from loading pipeline.
  835. Returns:
  836. dict: Result dict with images distorted.
  837. """
  838. if 'img_fields' in results:
  839. assert results['img_fields'] == ['img'], \
  840. 'Only single img_fields is allowed'
  841. img = results['img']
  842. assert img.dtype == np.float32, \
  843. 'PhotoMetricDistortion needs the input image of dtype ' \
  844. 'np.float32, please set "to_float32=True" in ' \
  845. '"LoadImageFromFile" pipeline'
  846. # random brightness
  847. if random.randint(2):
  848. delta = random.uniform(-self.brightness_delta,
  849. self.brightness_delta)
  850. img += delta
  851. # mode == 0 --> do random contrast first
  852. # mode == 1 --> do random contrast last
  853. mode = random.randint(2)
  854. if mode == 1:
  855. if random.randint(2):
  856. alpha = random.uniform(self.contrast_lower,
  857. self.contrast_upper)
  858. img *= alpha
  859. # convert color from BGR to HSV
  860. img = mmcv.bgr2hsv(img)
  861. # random saturation
  862. if random.randint(2):
  863. img[..., 1] *= random.uniform(self.saturation_lower,
  864. self.saturation_upper)
  865. # random hue
  866. if random.randint(2):
  867. img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
  868. img[..., 0][img[..., 0] > 360] -= 360
  869. img[..., 0][img[..., 0] < 0] += 360
  870. # convert color from HSV to BGR
  871. img = mmcv.hsv2bgr(img)
  872. # random contrast
  873. if mode == 0:
  874. if random.randint(2):
  875. alpha = random.uniform(self.contrast_lower,
  876. self.contrast_upper)
  877. img *= alpha
  878. # randomly swap channels
  879. if random.randint(2):
  880. img = img[..., random.permutation(3)]
  881. results['img'] = img
  882. return results
  883. def __repr__(self):
  884. repr_str = self.__class__.__name__
  885. repr_str += f'(\nbrightness_delta={self.brightness_delta},\n'
  886. repr_str += 'contrast_range='
  887. repr_str += f'{(self.contrast_lower, self.contrast_upper)},\n'
  888. repr_str += 'saturation_range='
  889. repr_str += f'{(self.saturation_lower, self.saturation_upper)},\n'
  890. repr_str += f'hue_delta={self.hue_delta})'
  891. return repr_str
  892. @PIPELINES.register_module()
  893. class Expand:
  894. """Random expand the image & bboxes.
  895. Randomly place the original image on a canvas of 'ratio' x original image
  896. size filled with mean values. The ratio is in the range of ratio_range.
  897. Args:
  898. mean (tuple): mean value of dataset.
  899. to_rgb (bool): if need to convert the order of mean to align with RGB.
  900. ratio_range (tuple): range of expand ratio.
  901. prob (float): probability of applying this transformation
  902. """
  903. def __init__(self,
  904. mean=(0, 0, 0),
  905. to_rgb=True,
  906. ratio_range=(1, 4),
  907. seg_ignore_label=None,
  908. prob=0.5):
  909. self.to_rgb = to_rgb
  910. self.ratio_range = ratio_range
  911. if to_rgb:
  912. self.mean = mean[::-1]
  913. else:
  914. self.mean = mean
  915. self.min_ratio, self.max_ratio = ratio_range
  916. self.seg_ignore_label = seg_ignore_label
  917. self.prob = prob
  918. def __call__(self, results):
  919. """Call function to expand images, bounding boxes.
  920. Args:
  921. results (dict): Result dict from loading pipeline.
  922. Returns:
  923. dict: Result dict with images, bounding boxes expanded
  924. """
  925. if random.uniform(0, 1) > self.prob:
  926. return results
  927. if 'img_fields' in results:
  928. assert results['img_fields'] == ['img'], \
  929. 'Only single img_fields is allowed'
  930. img = results['img']
  931. h, w, c = img.shape
  932. ratio = random.uniform(self.min_ratio, self.max_ratio)
  933. # speedup expand when meets large image
  934. if np.all(self.mean == self.mean[0]):
  935. expand_img = np.empty((int(h * ratio), int(w * ratio), c),
  936. img.dtype)
  937. expand_img.fill(self.mean[0])
  938. else:
  939. expand_img = np.full((int(h * ratio), int(w * ratio), c),
  940. self.mean,
  941. dtype=img.dtype)
  942. left = int(random.uniform(0, w * ratio - w))
  943. top = int(random.uniform(0, h * ratio - h))
  944. expand_img[top:top + h, left:left + w] = img
  945. results['img'] = expand_img
  946. # expand bboxes
  947. for key in results.get('bbox_fields', []):
  948. results[key] = results[key] + np.tile(
  949. (left, top), 2).astype(results[key].dtype)
  950. # expand masks
  951. for key in results.get('mask_fields', []):
  952. results[key] = results[key].expand(
  953. int(h * ratio), int(w * ratio), top, left)
  954. # expand segs
  955. for key in results.get('seg_fields', []):
  956. gt_seg = results[key]
  957. expand_gt_seg = np.full((int(h * ratio), int(w * ratio)),
  958. self.seg_ignore_label,
  959. dtype=gt_seg.dtype)
  960. expand_gt_seg[top:top + h, left:left + w] = gt_seg
  961. results[key] = expand_gt_seg
  962. return results
  963. def __repr__(self):
  964. repr_str = self.__class__.__name__
  965. repr_str += f'(mean={self.mean}, to_rgb={self.to_rgb}, '
  966. repr_str += f'ratio_range={self.ratio_range}, '
  967. repr_str += f'seg_ignore_label={self.seg_ignore_label})'
  968. return repr_str
  969. @PIPELINES.register_module()
  970. class MinIoURandomCrop:
  971. """Random crop the image & bboxes, the cropped patches have minimum IoU
  972. requirement with original image & bboxes, the IoU threshold is randomly
  973. selected from min_ious.
  974. Args:
  975. min_ious (tuple): minimum IoU threshold for all intersections with
  976. bounding boxes
  977. min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w,
  978. where a >= min_crop_size).
  979. bbox_clip_border (bool, optional): Whether clip the objects outside
  980. the border of the image. Defaults to True.
  981. Note:
  982. The keys for bboxes, labels and masks should be paired. That is, \
  983. `gt_bboxes` corresponds to `gt_labels` and `gt_masks`, and \
  984. `gt_bboxes_ignore` to `gt_labels_ignore` and `gt_masks_ignore`.
  985. """
  986. def __init__(self,
  987. min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
  988. min_crop_size=0.3,
  989. bbox_clip_border=True):
  990. # 1: return ori img
  991. self.min_ious = min_ious
  992. self.sample_mode = (1, *min_ious, 0)
  993. self.min_crop_size = min_crop_size
  994. self.bbox_clip_border = bbox_clip_border
  995. self.bbox2label = {
  996. 'gt_bboxes': 'gt_labels',
  997. 'gt_bboxes_ignore': 'gt_labels_ignore'
  998. }
  999. self.bbox2mask = {
  1000. 'gt_bboxes': 'gt_masks',
  1001. 'gt_bboxes_ignore': 'gt_masks_ignore'
  1002. }
  1003. def __call__(self, results):
  1004. """Call function to crop images and bounding boxes with minimum IoU
  1005. constraint.
  1006. Args:
  1007. results (dict): Result dict from loading pipeline.
  1008. Returns:
  1009. dict: Result dict with images and bounding boxes cropped, \
  1010. 'img_shape' key is updated.
  1011. """
  1012. if 'img_fields' in results:
  1013. assert results['img_fields'] == ['img'], \
  1014. 'Only single img_fields is allowed'
  1015. img = results['img']
  1016. assert 'bbox_fields' in results
  1017. boxes = [results[key] for key in results['bbox_fields']]
  1018. boxes = np.concatenate(boxes, 0)
  1019. h, w, c = img.shape
  1020. while True:
  1021. mode = random.choice(self.sample_mode)
  1022. self.mode = mode
  1023. if mode == 1:
  1024. return results
  1025. min_iou = mode
  1026. for i in range(50):
  1027. new_w = random.uniform(self.min_crop_size * w, w)
  1028. new_h = random.uniform(self.min_crop_size * h, h)
  1029. # h / w in [0.5, 2]
  1030. if new_h / new_w < 0.5 or new_h / new_w > 2:
  1031. continue
  1032. left = random.uniform(w - new_w)
  1033. top = random.uniform(h - new_h)
  1034. patch = np.array(
  1035. (int(left), int(top), int(left + new_w), int(top + new_h)))
  1036. # Line or point crop is not allowed
  1037. if patch[2] == patch[0] or patch[3] == patch[1]:
  1038. continue
  1039. overlaps = bbox_overlaps(
  1040. patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
  1041. if len(overlaps) > 0 and overlaps.min() < min_iou:
  1042. continue
  1043. # center of boxes should inside the crop img
  1044. # only adjust boxes and instance masks when the gt is not empty
  1045. if len(overlaps) > 0:
  1046. # adjust boxes
  1047. def is_center_of_bboxes_in_patch(boxes, patch):
  1048. center = (boxes[:, :2] + boxes[:, 2:]) / 2
  1049. mask = ((center[:, 0] > patch[0]) *
  1050. (center[:, 1] > patch[1]) *
  1051. (center[:, 0] < patch[2]) *
  1052. (center[:, 1] < patch[3]))
  1053. return mask
  1054. mask = is_center_of_bboxes_in_patch(boxes, patch)
  1055. if not mask.any():
  1056. continue
  1057. for key in results.get('bbox_fields', []):
  1058. boxes = results[key].copy()
  1059. mask = is_center_of_bboxes_in_patch(boxes, patch)
  1060. boxes = boxes[mask]
  1061. if self.bbox_clip_border:
  1062. boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
  1063. boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
  1064. boxes -= np.tile(patch[:2], 2)
  1065. results[key] = boxes
  1066. # labels
  1067. label_key = self.bbox2label.get(key)
  1068. if label_key in results:
  1069. results[label_key] = results[label_key][mask]
  1070. # mask fields
  1071. mask_key = self.bbox2mask.get(key)
  1072. if mask_key in results:
  1073. results[mask_key] = results[mask_key][
  1074. mask.nonzero()[0]].crop(patch)
  1075. # adjust the img no matter whether the gt is empty before crop
  1076. img = img[patch[1]:patch[3], patch[0]:patch[2]]
  1077. results['img'] = img
  1078. results['img_shape'] = img.shape
  1079. # seg fields
  1080. for key in results.get('seg_fields', []):
  1081. results[key] = results[key][patch[1]:patch[3],
  1082. patch[0]:patch[2]]
  1083. return results
  1084. def __repr__(self):
  1085. repr_str = self.__class__.__name__
  1086. repr_str += f'(min_ious={self.min_ious}, '
  1087. repr_str += f'min_crop_size={self.min_crop_size}, '
  1088. repr_str += f'bbox_clip_border={self.bbox_clip_border})'
  1089. return repr_str
  1090. @PIPELINES.register_module()
  1091. class Corrupt:
  1092. """Corruption augmentation.
  1093. Corruption transforms implemented based on
  1094. `imagecorruptions <https://github.com/bethgelab/imagecorruptions>`_.
  1095. Args:
  1096. corruption (str): Corruption name.
  1097. severity (int, optional): The severity of corruption. Default: 1.
  1098. """
  1099. def __init__(self, corruption, severity=1):
  1100. self.corruption = corruption
  1101. self.severity = severity
  1102. def __call__(self, results):
  1103. """Call function to corrupt image.
  1104. Args:
  1105. results (dict): Result dict from loading pipeline.
  1106. Returns:
  1107. dict: Result dict with images corrupted.
  1108. """
  1109. if corrupt is None:
  1110. raise RuntimeError('imagecorruptions is not installed')
  1111. if 'img_fields' in results:
  1112. assert results['img_fields'] == ['img'], \
  1113. 'Only single img_fields is allowed'
  1114. results['img'] = corrupt(
  1115. results['img'].astype(np.uint8),
  1116. corruption_name=self.corruption,
  1117. severity=self.severity)
  1118. return results
  1119. def __repr__(self):
  1120. repr_str = self.__class__.__name__
  1121. repr_str += f'(corruption={self.corruption}, '
  1122. repr_str += f'severity={self.severity})'
  1123. return repr_str
  1124. @PIPELINES.register_module()
  1125. class Albu:
  1126. """Albumentation augmentation.
  1127. Adds custom transformations from Albumentations library.
  1128. Please, visit `https://albumentations.readthedocs.io`
  1129. to get more information.
  1130. An example of ``transforms`` is as followed:
  1131. .. code-block::
  1132. [
  1133. dict(
  1134. type='ShiftScaleRotate',
  1135. shift_limit=0.0625,
  1136. scale_limit=0.0,
  1137. rotate_limit=0,
  1138. interpolation=1,
  1139. p=0.5),
  1140. dict(
  1141. type='RandomBrightnessContrast',
  1142. brightness_limit=[0.1, 0.3],
  1143. contrast_limit=[0.1, 0.3],
  1144. p=0.2),
  1145. dict(type='ChannelShuffle', p=0.1),
  1146. dict(
  1147. type='OneOf',
  1148. transforms=[
  1149. dict(type='Blur', blur_limit=3, p=1.0),
  1150. dict(type='MedianBlur', blur_limit=3, p=1.0)
  1151. ],
  1152. p=0.1),
  1153. ]
  1154. Args:
  1155. transforms (list[dict]): A list of albu transformations
  1156. bbox_params (dict): Bbox_params for albumentation `Compose`
  1157. keymap (dict): Contains {'input key':'albumentation-style key'}
  1158. skip_img_without_anno (bool): Whether to skip the image if no ann left
  1159. after aug
  1160. """
  1161. def __init__(self,
  1162. transforms,
  1163. bbox_params=None,
  1164. keymap=None,
  1165. update_pad_shape=False,
  1166. skip_img_without_anno=False):
  1167. if Compose is None:
  1168. raise RuntimeError('albumentations is not installed')
  1169. # Args will be modified later, copying it will be safer
  1170. transforms = copy.deepcopy(transforms)
  1171. if bbox_params is not None:
  1172. bbox_params = copy.deepcopy(bbox_params)
  1173. if keymap is not None:
  1174. keymap = copy.deepcopy(keymap)
  1175. self.transforms = transforms
  1176. self.filter_lost_elements = False
  1177. self.update_pad_shape = update_pad_shape
  1178. self.skip_img_without_anno = skip_img_without_anno
  1179. # A simple workaround to remove masks without boxes
  1180. if (isinstance(bbox_params, dict) and 'label_fields' in bbox_params
  1181. and 'filter_lost_elements' in bbox_params):
  1182. self.filter_lost_elements = True
  1183. self.origin_label_fields = bbox_params['label_fields']
  1184. bbox_params['label_fields'] = ['idx_mapper']
  1185. del bbox_params['filter_lost_elements']
  1186. self.bbox_params = (
  1187. self.albu_builder(bbox_params) if bbox_params else None)
  1188. self.aug = Compose([self.albu_builder(t) for t in self.transforms],
  1189. bbox_params=self.bbox_params)
  1190. if not keymap:
  1191. self.keymap_to_albu = {
  1192. 'img': 'image',
  1193. 'gt_masks': 'masks',
  1194. 'gt_bboxes': 'bboxes'
  1195. }
  1196. else:
  1197. self.keymap_to_albu = keymap
  1198. self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}
  1199. def albu_builder(self, cfg):
  1200. """Import a module from albumentations.
  1201. It inherits some of :func:`build_from_cfg` logic.
  1202. Args:
  1203. cfg (dict): Config dict. It should at least contain the key "type".
  1204. Returns:
  1205. obj: The constructed object.
  1206. """
  1207. assert isinstance(cfg, dict) and 'type' in cfg
  1208. args = cfg.copy()
  1209. obj_type = args.pop('type')
  1210. if mmcv.is_str(obj_type):
  1211. if albumentations is None:
  1212. raise RuntimeError('albumentations is not installed')
  1213. obj_cls = getattr(albumentations, obj_type)
  1214. elif inspect.isclass(obj_type):
  1215. obj_cls = obj_type
  1216. else:
  1217. raise TypeError(
  1218. f'type must be a str or valid type, but got {type(obj_type)}')
  1219. if 'transforms' in args:
  1220. args['transforms'] = [
  1221. self.albu_builder(transform)
  1222. for transform in args['transforms']
  1223. ]
  1224. return obj_cls(**args)
  1225. @staticmethod
  1226. def mapper(d, keymap):
  1227. """Dictionary mapper. Renames keys according to keymap provided.
  1228. Args:
  1229. d (dict): old dict
  1230. keymap (dict): {'old_key':'new_key'}
  1231. Returns:
  1232. dict: new dict.
  1233. """
  1234. updated_dict = {}
  1235. for k, v in zip(d.keys(), d.values()):
  1236. new_k = keymap.get(k, k)
  1237. updated_dict[new_k] = d[k]
  1238. return updated_dict
  1239. def __call__(self, results):
  1240. # dict to albumentations format
  1241. results = self.mapper(results, self.keymap_to_albu)
  1242. # TODO: add bbox_fields
  1243. if 'bboxes' in results:
  1244. # to list of boxes
  1245. if isinstance(results['bboxes'], np.ndarray):
  1246. results['bboxes'] = [x for x in results['bboxes']]
  1247. # add pseudo-field for filtration
  1248. if self.filter_lost_elements:
  1249. results['idx_mapper'] = np.arange(len(results['bboxes']))
  1250. # TODO: Support mask structure in albu
  1251. if 'masks' in results:
  1252. if isinstance(results['masks'], PolygonMasks):
  1253. raise NotImplementedError(
  1254. 'Albu only supports BitMap masks now')
  1255. ori_masks = results['masks']
  1256. if albumentations.__version__ < '0.5':
  1257. results['masks'] = results['masks'].masks
  1258. else:
  1259. results['masks'] = [mask for mask in results['masks'].masks]
  1260. results = self.aug(**results)
  1261. if 'bboxes' in results:
  1262. if isinstance(results['bboxes'], list):
  1263. results['bboxes'] = np.array(
  1264. results['bboxes'], dtype=np.float32)
  1265. results['bboxes'] = results['bboxes'].reshape(-1, 4)
  1266. # filter label_fields
  1267. if self.filter_lost_elements:
  1268. for label in self.origin_label_fields:
  1269. results[label] = np.array(
  1270. [results[label][i] for i in results['idx_mapper']])
  1271. if 'masks' in results:
  1272. results['masks'] = np.array(
  1273. [results['masks'][i] for i in results['idx_mapper']])
  1274. results['masks'] = ori_masks.__class__(
  1275. results['masks'], results['image'].shape[0],
  1276. results['image'].shape[1])
  1277. if (not len(results['idx_mapper'])
  1278. and self.skip_img_without_anno):
  1279. return None
  1280. if 'gt_labels' in results:
  1281. if isinstance(results['gt_labels'], list):
  1282. results['gt_labels'] = np.array(results['gt_labels'])
  1283. results['gt_labels'] = results['gt_labels'].astype(np.int64)
  1284. # back to the original format
  1285. results = self.mapper(results, self.keymap_back)
  1286. # update final shape
  1287. if self.update_pad_shape:
  1288. results['pad_shape'] = results['img'].shape
  1289. return results
  1290. def __repr__(self):
  1291. repr_str = self.__class__.__name__ + f'(transforms={self.transforms})'
  1292. return repr_str
  1293. @PIPELINES.register_module()
  1294. class RandomCenterCropPad:
  1295. """Random center crop and random around padding for CornerNet.
  1296. This operation generates randomly cropped image from the original image and
  1297. pads it simultaneously. Different from :class:`RandomCrop`, the output
  1298. shape may not equal to ``crop_size`` strictly. We choose a random value
  1299. from ``ratios`` and the output shape could be larger or smaller than
  1300. ``crop_size``. The padding operation is also different from :class:`Pad`,
  1301. here we use around padding instead of right-bottom padding.
  1302. The relation between output image (padding image) and original image:
  1303. .. code:: text
  1304. output image
  1305. +----------------------------+
  1306. | padded area |
  1307. +------|----------------------------|----------+
  1308. | | cropped area | |
  1309. | | +---------------+ | |
  1310. | | | . center | | | original image
  1311. | | | range | | |
  1312. | | +---------------+ | |
  1313. +------|----------------------------|----------+
  1314. | padded area |
  1315. +----------------------------+
  1316. There are 5 main areas in the figure:
  1317. - output image: output image of this operation, also called padding
  1318. image in following instruction.
  1319. - original image: input image of this operation.
  1320. - padded area: non-intersect area of output image and original image.
  1321. - cropped area: the overlap of output image and original image.
  1322. - center range: a smaller area where random center chosen from.
  1323. center range is computed by ``border`` and original image's shape
  1324. to avoid our random center is too close to original image's border.
  1325. Also this operation act differently in train and test mode, the summary
  1326. pipeline is listed below.
  1327. Train pipeline:
  1328. 1. Choose a ``random_ratio`` from ``ratios``, the shape of padding image
  1329. will be ``random_ratio * crop_size``.
  1330. 2. Choose a ``random_center`` in center range.
  1331. 3. Generate padding image with center matches the ``random_center``.
  1332. 4. Initialize the padding image with pixel value equals to ``mean``.
  1333. 5. Copy the cropped area to padding image.
  1334. 6. Refine annotations.
  1335. Test pipeline:
  1336. 1. Compute output shape according to ``test_pad_mode``.
  1337. 2. Generate padding image with center matches the original image
  1338. center.
  1339. 3. Initialize the padding image with pixel value equals to ``mean``.
  1340. 4. Copy the ``cropped area`` to padding image.
  1341. Args:
  1342. crop_size (tuple | None): expected size after crop, final size will
  1343. computed according to ratio. Requires (h, w) in train mode, and
  1344. None in test mode.
  1345. ratios (tuple): random select a ratio from tuple and crop image to
  1346. (crop_size[0] * ratio) * (crop_size[1] * ratio).
  1347. Only available in train mode.
  1348. border (int): max distance from center select area to image border.
  1349. Only available in train mode.
  1350. mean (sequence): Mean values of 3 channels.
  1351. std (sequence): Std values of 3 channels.
  1352. to_rgb (bool): Whether to convert the image from BGR to RGB.
  1353. test_mode (bool): whether involve random variables in transform.
  1354. In train mode, crop_size is fixed, center coords and ratio is
  1355. random selected from predefined lists. In test mode, crop_size
  1356. is image's original shape, center coords and ratio is fixed.
  1357. test_pad_mode (tuple): padding method and padding shape value, only
  1358. available in test mode. Default is using 'logical_or' with
  1359. 127 as padding shape value.
  1360. - 'logical_or': final_shape = input_shape | padding_shape_value
  1361. - 'size_divisor': final_shape = int(
  1362. ceil(input_shape / padding_shape_value) * padding_shape_value)
  1363. test_pad_add_pix (int): Extra padding pixel in test mode. Default 0.
  1364. bbox_clip_border (bool, optional): Whether clip the objects outside
  1365. the border of the image. Defaults to True.
  1366. """
  1367. def __init__(self,
  1368. crop_size=None,
  1369. ratios=(0.9, 1.0, 1.1),
  1370. border=128,
  1371. mean=None,
  1372. std=None,
  1373. to_rgb=None,
  1374. test_mode=False,
  1375. test_pad_mode=('logical_or', 127),
  1376. test_pad_add_pix=0,
  1377. bbox_clip_border=True):
  1378. if test_mode:
  1379. assert crop_size is None, 'crop_size must be None in test mode'
  1380. assert ratios is None, 'ratios must be None in test mode'
  1381. assert border is None, 'border must be None in test mode'
  1382. assert isinstance(test_pad_mode, (list, tuple))
  1383. assert test_pad_mode[0] in ['logical_or', 'size_divisor']
  1384. else:
  1385. assert isinstance(crop_size, (list, tuple))
  1386. assert crop_size[0] > 0 and crop_size[1] > 0, (
  1387. 'crop_size must > 0 in train mode')
  1388. assert isinstance(ratios, (list, tuple))
  1389. assert test_pad_mode is None, (
  1390. 'test_pad_mode must be None in train mode')
  1391. self.crop_size = crop_size
  1392. self.ratios = ratios
  1393. self.border = border
  1394. # We do not set default value to mean, std and to_rgb because these
  1395. # hyper-parameters are easy to forget but could affect the performance.
  1396. # Please use the same setting as Normalize for performance assurance.
  1397. assert mean is not None and std is not None and to_rgb is not None
  1398. self.to_rgb = to_rgb
  1399. self.input_mean = mean
  1400. self.input_std = std
  1401. if to_rgb:
  1402. self.mean = mean[::-1]
  1403. self.std = std[::-1]
  1404. else:
  1405. self.mean = mean
  1406. self.std = std
  1407. self.test_mode = test_mode
  1408. self.test_pad_mode = test_pad_mode
  1409. self.test_pad_add_pix = test_pad_add_pix
  1410. self.bbox_clip_border = bbox_clip_border
  1411. def _get_border(self, border, size):
  1412. """Get final border for the target size.
  1413. This function generates a ``final_border`` according to image's shape.
  1414. The area between ``final_border`` and ``size - final_border`` is the
  1415. ``center range``. We randomly choose center from the ``center range``
  1416. to avoid our random center is too close to original image's border.
  1417. Also ``center range`` should be larger than 0.
  1418. Args:
  1419. border (int): The initial border, default is 128.
  1420. size (int): The width or height of original image.
  1421. Returns:
  1422. int: The final border.
  1423. """
  1424. k = 2 * border / size
  1425. i = pow(2, np.ceil(np.log2(np.ceil(k))) + (k == int(k)))
  1426. return border // i
  1427. def _filter_boxes(self, patch, boxes):
  1428. """Check whether the center of each box is in the patch.
  1429. Args:
  1430. patch (list[int]): The cropped area, [left, top, right, bottom].
  1431. boxes (numpy array, (N x 4)): Ground truth boxes.
  1432. Returns:
  1433. mask (numpy array, (N,)): Each box is inside or outside the patch.
  1434. """
  1435. center = (boxes[:, :2] + boxes[:, 2:]) / 2
  1436. mask = (center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (
  1437. center[:, 0] < patch[2]) * (
  1438. center[:, 1] < patch[3])
  1439. return mask
  1440. def _crop_image_and_paste(self, image, center, size):
  1441. """Crop image with a given center and size, then paste the cropped
  1442. image to a blank image with two centers align.
  1443. This function is equivalent to generating a blank image with ``size``
  1444. as its shape. Then cover it on the original image with two centers (
  1445. the center of blank image and the random center of original image)
  1446. aligned. The overlap area is paste from the original image and the
  1447. outside area is filled with ``mean pixel``.
  1448. Args:
  1449. image (np array, H x W x C): Original image.
  1450. center (list[int]): Target crop center coord.
  1451. size (list[int]): Target crop size. [target_h, target_w]
  1452. Returns:
  1453. cropped_img (np array, target_h x target_w x C): Cropped image.
  1454. border (np array, 4): The distance of four border of
  1455. ``cropped_img`` to the original image area, [top, bottom,
  1456. left, right]
  1457. patch (list[int]): The cropped area, [left, top, right, bottom].
  1458. """
  1459. center_y, center_x = center
  1460. target_h, target_w = size
  1461. img_h, img_w, img_c = image.shape
  1462. x0 = max(0, center_x - target_w // 2)
  1463. x1 = min(center_x + target_w // 2, img_w)
  1464. y0 = max(0, center_y - target_h // 2)
  1465. y1 = min(center_y + target_h // 2, img_h)
  1466. patch = np.array((int(x0), int(y0), int(x1), int(y1)))
  1467. left, right = center_x - x0, x1 - center_x
  1468. top, bottom = center_y - y0, y1 - center_y
  1469. cropped_center_y, cropped_center_x = target_h // 2, target_w // 2
  1470. cropped_img = np.zeros((target_h, target_w, img_c), dtype=image.dtype)
  1471. for i in range(img_c):
  1472. cropped_img[:, :, i] += self.mean[i]
  1473. y_slice = slice(cropped_center_y - top, cropped_center_y + bottom)
  1474. x_slice = slice(cropped_center_x - left, cropped_center_x + right)
  1475. cropped_img[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
  1476. border = np.array([
  1477. cropped_center_y - top, cropped_center_y + bottom,
  1478. cropped_center_x - left, cropped_center_x + right
  1479. ],
  1480. dtype=np.float32)
  1481. return cropped_img, border, patch
  1482. def _train_aug(self, results):
  1483. """Random crop and around padding the original image.
  1484. Args:
  1485. results (dict): Image infomations in the augment pipeline.
  1486. Returns:
  1487. results (dict): The updated dict.
  1488. """
  1489. img = results['img']
  1490. h, w, c = img.shape
  1491. boxes = results['gt_bboxes']
  1492. while True:
  1493. scale = random.choice(self.ratios)
  1494. new_h = int(self.crop_size[0] * scale)
  1495. new_w = int(self.crop_size[1] * scale)
  1496. h_border = self._get_border(self.border, h)
  1497. w_border = self._get_border(self.border, w)
  1498. for i in range(50):
  1499. center_x = random.randint(low=w_border, high=w - w_border)
  1500. center_y = random.randint(low=h_border, high=h - h_border)
  1501. cropped_img, border, patch = self._crop_image_and_paste(
  1502. img, [center_y, center_x], [new_h, new_w])
  1503. mask = self._filter_boxes(patch, boxes)
  1504. # if image do not have valid bbox, any crop patch is valid.
  1505. if not mask.any() and len(boxes) > 0:
  1506. continue
  1507. results['img'] = cropped_img
  1508. results['img_shape'] = cropped_img.shape
  1509. results['pad_shape'] = cropped_img.shape
  1510. x0, y0, x1, y1 = patch
  1511. left_w, top_h = center_x - x0, center_y - y0
  1512. cropped_center_x, cropped_center_y = new_w // 2, new_h // 2
  1513. # crop bboxes accordingly and clip to the image boundary
  1514. for key in results.get('bbox_fields', []):
  1515. mask = self._filter_boxes(patch, results[key])
  1516. bboxes = results[key][mask]
  1517. bboxes[:, 0:4:2] += cropped_center_x - left_w - x0
  1518. bboxes[:, 1:4:2] += cropped_center_y - top_h - y0
  1519. if self.bbox_clip_border:
  1520. bboxes[:, 0:4:2] = np.clip(bboxes[:, 0:4:2], 0, new_w)
  1521. bboxes[:, 1:4:2] = np.clip(bboxes[:, 1:4:2], 0, new_h)
  1522. keep = (bboxes[:, 2] > bboxes[:, 0]) & (
  1523. bboxes[:, 3] > bboxes[:, 1])
  1524. bboxes = bboxes[keep]
  1525. results[key] = bboxes
  1526. if key in ['gt_bboxes']:
  1527. if 'gt_labels' in results:
  1528. labels = results['gt_labels'][mask]
  1529. labels = labels[keep]
  1530. results['gt_labels'] = labels
  1531. if 'gt_masks' in results:
  1532. raise NotImplementedError(
  1533. 'RandomCenterCropPad only supports bbox.')
  1534. # crop semantic seg
  1535. for key in results.get('seg_fields', []):
  1536. raise NotImplementedError(
  1537. 'RandomCenterCropPad only supports bbox.')
  1538. return results
  1539. def _test_aug(self, results):
  1540. """Around padding the original image without cropping.
  1541. The padding mode and value are from ``test_pad_mode``.
  1542. Args:
  1543. results (dict): Image infomations in the augment pipeline.
  1544. Returns:
  1545. results (dict): The updated dict.
  1546. """
  1547. img = results['img']
  1548. h, w, c = img.shape
  1549. results['img_shape'] = img.shape
  1550. if self.test_pad_mode[0] in ['logical_or']:
  1551. # self.test_pad_add_pix is only used for centernet
  1552. target_h = (h | self.test_pad_mode[1]) + self.test_pad_add_pix
  1553. target_w = (w | self.test_pad_mode[1]) + self.test_pad_add_pix
  1554. elif self.test_pad_mode[0] in ['size_divisor']:
  1555. divisor = self.test_pad_mode[1]
  1556. target_h = int(np.ceil(h / divisor)) * divisor
  1557. target_w = int(np.ceil(w / divisor)) * divisor
  1558. else:
  1559. raise NotImplementedError(
  1560. 'RandomCenterCropPad only support two testing pad mode:'
  1561. 'logical-or and size_divisor.')
  1562. cropped_img, border, _ = self._crop_image_and_paste(
  1563. img, [h // 2, w // 2], [target_h, target_w])
  1564. results['img'] = cropped_img
  1565. results['pad_shape'] = cropped_img.shape
  1566. results['border'] = border
  1567. return results
  1568. def __call__(self, results):
  1569. img = results['img']
  1570. assert img.dtype == np.float32, (
  1571. 'RandomCenterCropPad needs the input image of dtype np.float32,'
  1572. ' please set "to_float32=True" in "LoadImageFromFile" pipeline')
  1573. h, w, c = img.shape
  1574. assert c == len(self.mean)
  1575. if self.test_mode:
  1576. return self._test_aug(results)
  1577. else:
  1578. return self._train_aug(results)
  1579. def __repr__(self):
  1580. repr_str = self.__class__.__name__
  1581. repr_str += f'(crop_size={self.crop_size}, '
  1582. repr_str += f'ratios={self.ratios}, '
  1583. repr_str += f'border={self.border}, '
  1584. repr_str += f'mean={self.input_mean}, '
  1585. repr_str += f'std={self.input_std}, '
  1586. repr_str += f'to_rgb={self.to_rgb}, '
  1587. repr_str += f'test_mode={self.test_mode}, '
  1588. repr_str += f'test_pad_mode={self.test_pad_mode}, '
  1589. repr_str += f'bbox_clip_border={self.bbox_clip_border})'
  1590. return repr_str
  1591. @PIPELINES.register_module()
  1592. class CutOut:
  1593. """CutOut operation.
  1594. Randomly drop some regions of image used in
  1595. `Cutout <https://arxiv.org/abs/1708.04552>`_.
  1596. Args:
  1597. n_holes (int | tuple[int, int]): Number of regions to be dropped.
  1598. If it is given as a list, number of holes will be randomly
  1599. selected from the closed interval [`n_holes[0]`, `n_holes[1]`].
  1600. cutout_shape (tuple[int, int] | list[tuple[int, int]]): The candidate
  1601. shape of dropped regions. It can be `tuple[int, int]` to use a
  1602. fixed cutout shape, or `list[tuple[int, int]]` to randomly choose
  1603. shape from the list.
  1604. cutout_ratio (tuple[float, float] | list[tuple[float, float]]): The
  1605. candidate ratio of dropped regions. It can be `tuple[float, float]`
  1606. to use a fixed ratio or `list[tuple[float, float]]` to randomly
  1607. choose ratio from the list. Please note that `cutout_shape`
  1608. and `cutout_ratio` cannot be both given at the same time.
  1609. fill_in (tuple[float, float, float] | tuple[int, int, int]): The value
  1610. of pixel to fill in the dropped regions. Default: (0, 0, 0).
  1611. """
  1612. def __init__(self,
  1613. n_holes,
  1614. cutout_shape=None,
  1615. cutout_ratio=None,
  1616. fill_in=(0, 0, 0)):
  1617. assert (cutout_shape is None) ^ (cutout_ratio is None), \
  1618. 'Either cutout_shape or cutout_ratio should be specified.'
  1619. assert (isinstance(cutout_shape, (list, tuple))
  1620. or isinstance(cutout_ratio, (list, tuple)))
  1621. if isinstance(n_holes, tuple):
  1622. assert len(n_holes) == 2 and 0 <= n_holes[0] < n_holes[1]
  1623. else:
  1624. n_holes = (n_holes, n_holes)
  1625. self.n_holes = n_holes
  1626. self.fill_in = fill_in
  1627. self.with_ratio = cutout_ratio is not None
  1628. self.candidates = cutout_ratio if self.with_ratio else cutout_shape
  1629. if not isinstance(self.candidates, list):
  1630. self.candidates = [self.candidates]
  1631. def __call__(self, results):
  1632. """Call function to drop some regions of image."""
  1633. h, w, c = results['img'].shape
  1634. n_holes = np.random.randint(self.n_holes[0], self.n_holes[1] + 1)
  1635. for _ in range(n_holes):
  1636. x1 = np.random.randint(0, w)
  1637. y1 = np.random.randint(0, h)
  1638. index = np.random.randint(0, len(self.candidates))
  1639. if not self.with_ratio:
  1640. cutout_w, cutout_h = self.candidates[index]
  1641. else:
  1642. cutout_w = int(self.candidates[index][0] * w)
  1643. cutout_h = int(self.candidates[index][1] * h)
  1644. x2 = np.clip(x1 + cutout_w, 0, w)
  1645. y2 = np.clip(y1 + cutout_h, 0, h)
  1646. results['img'][y1:y2, x1:x2, :] = self.fill_in
  1647. return results
  1648. def __repr__(self):
  1649. repr_str = self.__class__.__name__
  1650. repr_str += f'(n_holes={self.n_holes}, '
  1651. repr_str += (f'cutout_ratio={self.candidates}, ' if self.with_ratio
  1652. else f'cutout_shape={self.candidates}, ')
  1653. repr_str += f'fill_in={self.fill_in})'
  1654. return repr_str
  1655. @PIPELINES.register_module()
  1656. class Mosaic:
  1657. """Mosaic augmentation.
  1658. Given 4 images, mosaic transform combines them into
  1659. one output image. The output image is composed of the parts from each sub-
  1660. image.
  1661. .. code:: text
  1662. mosaic transform
  1663. center_x
  1664. +------------------------------+
  1665. | pad | pad |
  1666. | +-----------+ |
  1667. | | | |
  1668. | | image1 |--------+ |
  1669. | | | | |
  1670. | | | image2 | |
  1671. center_y |----+-------------+-----------|
  1672. | | cropped | |
  1673. |pad | image3 | image4 |
  1674. | | | |
  1675. +----|-------------+-----------+
  1676. | |
  1677. +-------------+
  1678. The mosaic transform steps are as follows:
  1679. 1. Choose the mosaic center as the intersections of 4 images
  1680. 2. Get the left top image according to the index, and randomly
  1681. sample another 3 images from the custom dataset.
  1682. 3. Sub image will be cropped if image is larger than mosaic patch
  1683. Args:
  1684. img_scale (Sequence[int]): Image size after mosaic pipeline of single
  1685. image. Default to (640, 640).
  1686. center_ratio_range (Sequence[float]): Center ratio range of mosaic
  1687. output. Default to (0.5, 1.5).
  1688. min_bbox_size (int | float): The minimum pixel for filtering
  1689. invalid bboxes after the mosaic pipeline. Default to 0.
  1690. pad_val (int): Pad value. Default to 114.
  1691. """
  1692. def __init__(self,
  1693. img_scale=(640, 640),
  1694. center_ratio_range=(0.5, 1.5),
  1695. min_bbox_size=0,
  1696. pad_val=114):
  1697. assert isinstance(img_scale, tuple)
  1698. self.img_scale = img_scale
  1699. self.center_ratio_range = center_ratio_range
  1700. self.min_bbox_size = min_bbox_size
  1701. self.pad_val = pad_val
  1702. def __call__(self, results):
  1703. """Call function to make a mosaic of image.
  1704. Args:
  1705. results (dict): Result dict.
  1706. Returns:
  1707. dict: Result dict with mosaic transformed.
  1708. """
  1709. results = self._mosaic_transform(results)
  1710. return results
  1711. def get_indexes(self, dataset):
  1712. """Call function to collect indexes.
  1713. Args:
  1714. dataset (:obj:`MultiImageMixDataset`): The dataset.
  1715. Returns:
  1716. list: indexes.
  1717. """
  1718. indexes = [random.randint(0, len(dataset)) for _ in range(3)]
  1719. return indexes
  1720. def _mosaic_transform(self, results):
  1721. """Mosaic transform function.
  1722. Args:
  1723. results (dict): Result dict.
  1724. Returns:
  1725. dict: Updated result dict.
  1726. """
  1727. assert 'mix_results' in results
  1728. mosaic_labels = []
  1729. mosaic_bboxes = []
  1730. if len(results['img'].shape) == 3:
  1731. mosaic_img = np.full(
  1732. (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2), 3),
  1733. self.pad_val,
  1734. dtype=results['img'].dtype)
  1735. else:
  1736. mosaic_img = np.full(
  1737. (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)),
  1738. self.pad_val,
  1739. dtype=results['img'].dtype)
  1740. # mosaic center x, y
  1741. center_x = int(
  1742. random.uniform(*self.center_ratio_range) * self.img_scale[1])
  1743. center_y = int(
  1744. random.uniform(*self.center_ratio_range) * self.img_scale[0])
  1745. center_position = (center_x, center_y)
  1746. loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right')
  1747. for i, loc in enumerate(loc_strs):
  1748. if loc == 'top_left':
  1749. results_patch = copy.deepcopy(results)
  1750. else:
  1751. results_patch = copy.deepcopy(results['mix_results'][i - 1])
  1752. img_i = results_patch['img']
  1753. h_i, w_i = img_i.shape[:2]
  1754. # keep_ratio resize
  1755. scale_ratio_i = min(self.img_scale[0] / h_i,
  1756. self.img_scale[1] / w_i)
  1757. img_i = mmcv.imresize(
  1758. img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)))
  1759. # compute the combine parameters
  1760. paste_coord, crop_coord = self._mosaic_combine(
  1761. loc, center_position, img_i.shape[:2][::-1])
  1762. x1_p, y1_p, x2_p, y2_p = paste_coord
  1763. x1_c, y1_c, x2_c, y2_c = crop_coord
  1764. # crop and paste image
  1765. mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c]
  1766. # adjust coordinate
  1767. gt_bboxes_i = results_patch['gt_bboxes']
  1768. gt_labels_i = results_patch['gt_labels']
  1769. if gt_bboxes_i.shape[0] > 0:
  1770. padw = x1_p - x1_c
  1771. padh = y1_p - y1_c
  1772. gt_bboxes_i[:, 0::2] = \
  1773. scale_ratio_i * gt_bboxes_i[:, 0::2] + padw
  1774. gt_bboxes_i[:, 1::2] = \
  1775. scale_ratio_i * gt_bboxes_i[:, 1::2] + padh
  1776. mosaic_bboxes.append(gt_bboxes_i)
  1777. mosaic_labels.append(gt_labels_i)
  1778. if len(mosaic_labels) > 0:
  1779. mosaic_bboxes = np.concatenate(mosaic_bboxes, 0)
  1780. mosaic_bboxes[:, 0::2] = np.clip(mosaic_bboxes[:, 0::2], 0,
  1781. 2 * self.img_scale[1])
  1782. mosaic_bboxes[:, 1::2] = np.clip(mosaic_bboxes[:, 1::2], 0,
  1783. 2 * self.img_scale[0])
  1784. mosaic_labels = np.concatenate(mosaic_labels, 0)
  1785. mosaic_bboxes, mosaic_labels = \
  1786. self._filter_box_candidates(mosaic_bboxes, mosaic_labels)
  1787. results['img'] = mosaic_img
  1788. results['img_shape'] = mosaic_img.shape
  1789. results['ori_shape'] = mosaic_img.shape
  1790. results['gt_bboxes'] = mosaic_bboxes
  1791. results['gt_labels'] = mosaic_labels
  1792. return results
  1793. def _mosaic_combine(self, loc, center_position_xy, img_shape_wh):
  1794. """Calculate global coordinate of mosaic image and local coordinate of
  1795. cropped sub-image.
  1796. Args:
  1797. loc (str): Index for the sub-image, loc in ('top_left',
  1798. 'top_right', 'bottom_left', 'bottom_right').
  1799. center_position_xy (Sequence[float]): Mixing center for 4 images,
  1800. (x, y).
  1801. img_shape_wh (Sequence[int]): Width and height of sub-image
  1802. Returns:
  1803. tuple[tuple[float]]: Corresponding coordinate of pasting and
  1804. cropping
  1805. - paste_coord (tuple): paste corner coordinate in mosaic image.
  1806. - crop_coord (tuple): crop corner coordinate in mosaic image.
  1807. """
  1808. assert loc in ('top_left', 'top_right', 'bottom_left', 'bottom_right')
  1809. if loc == 'top_left':
  1810. # index0 to top left part of image
  1811. x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
  1812. max(center_position_xy[1] - img_shape_wh[1], 0), \
  1813. center_position_xy[0], \
  1814. center_position_xy[1]
  1815. crop_coord = img_shape_wh[0] - (x2 - x1), img_shape_wh[1] - (
  1816. y2 - y1), img_shape_wh[0], img_shape_wh[1]
  1817. elif loc == 'top_right':
  1818. # index1 to top right part of image
  1819. x1, y1, x2, y2 = center_position_xy[0], \
  1820. max(center_position_xy[1] - img_shape_wh[1], 0), \
  1821. min(center_position_xy[0] + img_shape_wh[0],
  1822. self.img_scale[1] * 2), \
  1823. center_position_xy[1]
  1824. crop_coord = 0, img_shape_wh[1] - (y2 - y1), min(
  1825. img_shape_wh[0], x2 - x1), img_shape_wh[1]
  1826. elif loc == 'bottom_left':
  1827. # index2 to bottom left part of image
  1828. x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
  1829. center_position_xy[1], \
  1830. center_position_xy[0], \
  1831. min(self.img_scale[0] * 2, center_position_xy[1] +
  1832. img_shape_wh[1])
  1833. crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min(
  1834. y2 - y1, img_shape_wh[1])
  1835. else:
  1836. # index3 to bottom right part of image
  1837. x1, y1, x2, y2 = center_position_xy[0], \
  1838. center_position_xy[1], \
  1839. min(center_position_xy[0] + img_shape_wh[0],
  1840. self.img_scale[1] * 2), \
  1841. min(self.img_scale[0] * 2, center_position_xy[1] +
  1842. img_shape_wh[1])
  1843. crop_coord = 0, 0, min(img_shape_wh[0],
  1844. x2 - x1), min(y2 - y1, img_shape_wh[1])
  1845. paste_coord = x1, y1, x2, y2
  1846. return paste_coord, crop_coord
  1847. def _filter_box_candidates(self, bboxes, labels):
  1848. """Filter out bboxes too small after Mosaic."""
  1849. bbox_w = bboxes[:, 2] - bboxes[:, 0]
  1850. bbox_h = bboxes[:, 3] - bboxes[:, 1]
  1851. valid_inds = (bbox_w > self.min_bbox_size) & \
  1852. (bbox_h > self.min_bbox_size)
  1853. valid_inds = np.nonzero(valid_inds)[0]
  1854. return bboxes[valid_inds], labels[valid_inds]
  1855. def __repr__(self):
  1856. repr_str = self.__class__.__name__
  1857. repr_str += f'img_scale={self.img_scale}, '
  1858. repr_str += f'center_ratio_range={self.center_ratio_range})'
  1859. repr_str += f'pad_val={self.pad_val})'
  1860. return repr_str
  1861. @PIPELINES.register_module()
  1862. class MixUp:
  1863. """MixUp data augmentation.
  1864. .. code:: text
  1865. mixup transform
  1866. +------------------------------+
  1867. | mixup image | |
  1868. | +--------|--------+ |
  1869. | | | | |
  1870. |---------------+ | |
  1871. | | | |
  1872. | | image | |
  1873. | | | |
  1874. | | | |
  1875. | |-----------------+ |
  1876. | pad |
  1877. +------------------------------+
  1878. The mixup transform steps are as follows::
  1879. 1. Another random image is picked by dataset and embedded in
  1880. the top left patch(after padding and resizing)
  1881. 2. The target of mixup transform is the weighted average of mixup
  1882. image and origin image.
  1883. Args:
  1884. img_scale (Sequence[int]): Image output size after mixup pipeline.
  1885. Default: (640, 640).
  1886. ratio_range (Sequence[float]): Scale ratio of mixup image.
  1887. Default: (0.5, 1.5).
  1888. flip_ratio (float): Horizontal flip ratio of mixup image.
  1889. Default: 0.5.
  1890. pad_val (int): Pad value. Default: 114.
  1891. max_iters (int): The maximum number of iterations. If the number of
  1892. iterations is greater than `max_iters`, but gt_bbox is still
  1893. empty, then the iteration is terminated. Default: 15.
  1894. min_bbox_size (float): Width and height threshold to filter bboxes.
  1895. If the height or width of a box is smaller than this value, it
  1896. will be removed. Default: 5.
  1897. min_area_ratio (float): Threshold of area ratio between
  1898. original bboxes and wrapped bboxes. If smaller than this value,
  1899. the box will be removed. Default: 0.2.
  1900. max_aspect_ratio (float): Aspect ratio of width and height
  1901. threshold to filter bboxes. If max(h/w, w/h) larger than this
  1902. value, the box will be removed. Default: 20.
  1903. """
  1904. def __init__(self,
  1905. img_scale=(640, 640),
  1906. ratio_range=(0.5, 1.5),
  1907. flip_ratio=0.5,
  1908. pad_val=114,
  1909. max_iters=15,
  1910. min_bbox_size=5,
  1911. min_area_ratio=0.2,
  1912. max_aspect_ratio=20):
  1913. assert isinstance(img_scale, tuple)
  1914. self.dynamic_scale = img_scale
  1915. self.ratio_range = ratio_range
  1916. self.flip_ratio = flip_ratio
  1917. self.pad_val = pad_val
  1918. self.max_iters = max_iters
  1919. self.min_bbox_size = min_bbox_size
  1920. self.min_area_ratio = min_area_ratio
  1921. self.max_aspect_ratio = max_aspect_ratio
  1922. def __call__(self, results):
  1923. """Call function to make a mixup of image.
  1924. Args:
  1925. results (dict): Result dict.
  1926. Returns:
  1927. dict: Result dict with mixup transformed.
  1928. """
  1929. results = self._mixup_transform(results)
  1930. return results
  1931. def get_indexes(self, dataset):
  1932. """Call function to collect indexes.
  1933. Args:
  1934. dataset (:obj:`MultiImageMixDataset`): The dataset.
  1935. Returns:
  1936. list: indexes.
  1937. """
  1938. for i in range(self.max_iters):
  1939. index = random.randint(0, len(dataset))
  1940. gt_bboxes_i = dataset.get_ann_info(index)['bboxes']
  1941. if len(gt_bboxes_i) != 0:
  1942. break
  1943. return index
  1944. def _mixup_transform(self, results):
  1945. """MixUp transform function.
  1946. Args:
  1947. results (dict): Result dict.
  1948. Returns:
  1949. dict: Updated result dict.
  1950. """
  1951. assert 'mix_results' in results
  1952. assert len(
  1953. results['mix_results']) == 1, 'MixUp only support 2 images now !'
  1954. if results['mix_results'][0]['gt_bboxes'].shape[0] == 0:
  1955. # empty bbox
  1956. return results
  1957. if 'scale' in results:
  1958. self.dynamic_scale = results['scale']
  1959. retrieve_results = results['mix_results'][0]
  1960. retrieve_img = retrieve_results['img']
  1961. jit_factor = random.uniform(*self.ratio_range)
  1962. is_filp = random.uniform(0, 1) > self.flip_ratio
  1963. if len(retrieve_img.shape) == 3:
  1964. out_img = np.ones(
  1965. (self.dynamic_scale[0], self.dynamic_scale[1], 3),
  1966. dtype=retrieve_img.dtype) * self.pad_val
  1967. else:
  1968. out_img = np.ones(
  1969. self.dynamic_scale, dtype=retrieve_img.dtype) * self.pad_val
  1970. # 1. keep_ratio resize
  1971. scale_ratio = min(self.dynamic_scale[0] / retrieve_img.shape[0],
  1972. self.dynamic_scale[1] / retrieve_img.shape[1])
  1973. retrieve_img = mmcv.imresize(
  1974. retrieve_img, (int(retrieve_img.shape[1] * scale_ratio),
  1975. int(retrieve_img.shape[0] * scale_ratio)))
  1976. # 2. paste
  1977. out_img[:retrieve_img.shape[0], :retrieve_img.shape[1]] = retrieve_img
  1978. # 3. scale jit
  1979. scale_ratio *= jit_factor
  1980. out_img = mmcv.imresize(out_img, (int(out_img.shape[1] * jit_factor),
  1981. int(out_img.shape[0] * jit_factor)))
  1982. # 4. flip
  1983. if is_filp:
  1984. out_img = out_img[:, ::-1, :]
  1985. # 5. random crop
  1986. ori_img = results['img']
  1987. origin_h, origin_w = out_img.shape[:2]
  1988. target_h, target_w = ori_img.shape[:2]
  1989. padded_img = np.zeros(
  1990. (max(origin_h, target_h), max(origin_w,
  1991. target_w), 3)).astype(np.uint8)
  1992. padded_img[:origin_h, :origin_w] = out_img
  1993. x_offset, y_offset = 0, 0
  1994. if padded_img.shape[0] > target_h:
  1995. y_offset = random.randint(0, padded_img.shape[0] - target_h)
  1996. if padded_img.shape[1] > target_w:
  1997. x_offset = random.randint(0, padded_img.shape[1] - target_w)
  1998. padded_cropped_img = padded_img[y_offset:y_offset + target_h,
  1999. x_offset:x_offset + target_w]
  2000. # 6. adjust bbox
  2001. retrieve_gt_bboxes = retrieve_results['gt_bboxes']
  2002. retrieve_gt_bboxes[:, 0::2] = np.clip(
  2003. retrieve_gt_bboxes[:, 0::2] * scale_ratio, 0, origin_w)
  2004. retrieve_gt_bboxes[:, 1::2] = np.clip(
  2005. retrieve_gt_bboxes[:, 1::2] * scale_ratio, 0, origin_h)
  2006. if is_filp:
  2007. retrieve_gt_bboxes[:, 0::2] = (
  2008. origin_w - retrieve_gt_bboxes[:, 0::2][:, ::-1])
  2009. # 7. filter
  2010. cp_retrieve_gt_bboxes = retrieve_gt_bboxes.copy()
  2011. cp_retrieve_gt_bboxes[:, 0::2] = np.clip(
  2012. cp_retrieve_gt_bboxes[:, 0::2] - x_offset, 0, target_w)
  2013. cp_retrieve_gt_bboxes[:, 1::2] = np.clip(
  2014. cp_retrieve_gt_bboxes[:, 1::2] - y_offset, 0, target_h)
  2015. keep_list = self._filter_box_candidates(retrieve_gt_bboxes.T,
  2016. cp_retrieve_gt_bboxes.T)
  2017. # 8. mix up
  2018. if keep_list.sum() >= 1.0:
  2019. ori_img = ori_img.astype(np.float32)
  2020. mixup_img = 0.5 * ori_img + 0.5 * padded_cropped_img.astype(
  2021. np.float32)
  2022. retrieve_gt_labels = retrieve_results['gt_labels'][keep_list]
  2023. retrieve_gt_bboxes = cp_retrieve_gt_bboxes[keep_list]
  2024. mixup_gt_bboxes = np.concatenate(
  2025. (results['gt_bboxes'], retrieve_gt_bboxes), axis=0)
  2026. mixup_gt_labels = np.concatenate(
  2027. (results['gt_labels'], retrieve_gt_labels), axis=0)
  2028. results['img'] = mixup_img
  2029. results['img_shape'] = mixup_img.shape
  2030. results['gt_bboxes'] = mixup_gt_bboxes
  2031. results['gt_labels'] = mixup_gt_labels
  2032. return results
  2033. def _filter_box_candidates(self, bbox1, bbox2):
  2034. """Compute candidate boxes which include following 5 things:
  2035. bbox1 before augment, bbox2 after augment, min_bbox_size (pixels),
  2036. min_area_ratio, max_aspect_ratio.
  2037. """
  2038. w1, h1 = bbox1[2] - bbox1[0], bbox1[3] - bbox1[1]
  2039. w2, h2 = bbox2[2] - bbox2[0], bbox2[3] - bbox2[1]
  2040. ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16))
  2041. return ((w2 > self.min_bbox_size)
  2042. & (h2 > self.min_bbox_size)
  2043. & (w2 * h2 / (w1 * h1 + 1e-16) > self.min_area_ratio)
  2044. & (ar < self.max_aspect_ratio))
  2045. def __repr__(self):
  2046. repr_str = self.__class__.__name__
  2047. repr_str += f'dynamic_scale={self.dynamic_scale}, '
  2048. repr_str += f'ratio_range={self.ratio_range})'
  2049. repr_str += f'flip_ratio={self.flip_ratio})'
  2050. repr_str += f'pad_val={self.pad_val})'
  2051. repr_str += f'max_iters={self.max_iters})'
  2052. repr_str += f'min_bbox_size={self.min_bbox_size})'
  2053. repr_str += f'min_area_ratio={self.min_area_ratio})'
  2054. repr_str += f'max_aspect_ratio={self.max_aspect_ratio})'
  2055. return repr_str
  2056. @PIPELINES.register_module()
  2057. class RandomAffine:
  2058. """Random affine transform data augmentation.
  2059. This operation randomly generates affine transform matrix which including
  2060. rotation, translation, shear and scaling transforms.
  2061. Args:
  2062. max_rotate_degree (float): Maximum degrees of rotation transform.
  2063. Default: 10.
  2064. max_translate_ratio (float): Maximum ratio of translation.
  2065. Default: 0.1.
  2066. scaling_ratio_range (tuple[float]): Min and max ratio of
  2067. scaling transform. Default: (0.5, 1.5).
  2068. max_shear_degree (float): Maximum degrees of shear
  2069. transform. Default: 2.
  2070. border (tuple[int]): Distance from height and width sides of input
  2071. image to adjust output shape. Only used in mosaic dataset.
  2072. Default: (0, 0).
  2073. border_val (tuple[int]): Border padding values of 3 channels.
  2074. Default: (114, 114, 114).
  2075. min_bbox_size (float): Width and height threshold to filter bboxes.
  2076. If the height or width of a box is smaller than this value, it
  2077. will be removed. Default: 2.
  2078. min_area_ratio (float): Threshold of area ratio between
  2079. original bboxes and wrapped bboxes. If smaller than this value,
  2080. the box will be removed. Default: 0.2.
  2081. max_aspect_ratio (float): Aspect ratio of width and height
  2082. threshold to filter bboxes. If max(h/w, w/h) larger than this
  2083. value, the box will be removed.
  2084. """
  2085. def __init__(self,
  2086. max_rotate_degree=10.0,
  2087. max_translate_ratio=0.1,
  2088. scaling_ratio_range=(0.5, 1.5),
  2089. max_shear_degree=2.0,
  2090. border=(0, 0),
  2091. border_val=(114, 114, 114),
  2092. min_bbox_size=2,
  2093. min_area_ratio=0.2,
  2094. max_aspect_ratio=20):
  2095. assert 0 <= max_translate_ratio <= 1
  2096. assert scaling_ratio_range[0] <= scaling_ratio_range[1]
  2097. assert scaling_ratio_range[0] > 0
  2098. self.max_rotate_degree = max_rotate_degree
  2099. self.max_translate_ratio = max_translate_ratio
  2100. self.scaling_ratio_range = scaling_ratio_range
  2101. self.max_shear_degree = max_shear_degree
  2102. self.border = border
  2103. self.border_val = border_val
  2104. self.min_bbox_size = min_bbox_size
  2105. self.min_area_ratio = min_area_ratio
  2106. self.max_aspect_ratio = max_aspect_ratio
  2107. def __call__(self, results):
  2108. img = results['img']
  2109. height = img.shape[0] + self.border[0] * 2
  2110. width = img.shape[1] + self.border[1] * 2
  2111. # Center
  2112. center_matrix = np.eye(3, dtype=np.float32)
  2113. center_matrix[0, 2] = -img.shape[1] / 2 # x translation (pixels)
  2114. center_matrix[1, 2] = -img.shape[0] / 2 # y translation (pixels)
  2115. # Rotation
  2116. rotation_degree = random.uniform(-self.max_rotate_degree,
  2117. self.max_rotate_degree)
  2118. rotation_matrix = self._get_rotation_matrix(rotation_degree)
  2119. # Scaling
  2120. scaling_ratio = random.uniform(self.scaling_ratio_range[0],
  2121. self.scaling_ratio_range[1])
  2122. scaling_matrix = self._get_scaling_matrix(scaling_ratio)
  2123. # Shear
  2124. x_degree = random.uniform(-self.max_shear_degree,
  2125. self.max_shear_degree)
  2126. y_degree = random.uniform(-self.max_shear_degree,
  2127. self.max_shear_degree)
  2128. shear_matrix = self._get_shear_matrix(x_degree, y_degree)
  2129. # Translation
  2130. trans_x = random.uniform(0.5 - self.max_translate_ratio,
  2131. 0.5 + self.max_translate_ratio) * width
  2132. trans_y = random.uniform(0.5 - self.max_translate_ratio,
  2133. 0.5 + self.max_translate_ratio) * height
  2134. translate_matrix = self._get_translation_matrix(trans_x, trans_y)
  2135. warp_matrix = (
  2136. translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix
  2137. @ center_matrix)
  2138. img = cv2.warpPerspective(
  2139. img,
  2140. warp_matrix,
  2141. dsize=(width, height),
  2142. borderValue=self.border_val)
  2143. results['img'] = img
  2144. results['img_shape'] = img.shape
  2145. for key in results.get('bbox_fields', []):
  2146. bboxes = results[key]
  2147. num_bboxes = len(bboxes)
  2148. if num_bboxes:
  2149. # homogeneous coordinates
  2150. xs = bboxes[:, [0, 0, 2, 2]].reshape(num_bboxes * 4)
  2151. ys = bboxes[:, [1, 3, 3, 1]].reshape(num_bboxes * 4)
  2152. ones = np.ones_like(xs)
  2153. points = np.vstack([xs, ys, ones])
  2154. warp_points = warp_matrix @ points
  2155. warp_points = warp_points[:2] / warp_points[2]
  2156. xs = warp_points[0].reshape(num_bboxes, 4)
  2157. ys = warp_points[1].reshape(num_bboxes, 4)
  2158. warp_bboxes = np.vstack(
  2159. (xs.min(1), ys.min(1), xs.max(1), ys.max(1))).T
  2160. warp_bboxes[:, [0, 2]] = warp_bboxes[:, [0, 2]].clip(0, width)
  2161. warp_bboxes[:, [1, 3]] = warp_bboxes[:, [1, 3]].clip(0, height)
  2162. # filter bboxes
  2163. valid_index = self.filter_gt_bboxes(bboxes * scaling_ratio,
  2164. warp_bboxes)
  2165. results[key] = warp_bboxes[valid_index]
  2166. if key in ['gt_bboxes']:
  2167. if 'gt_labels' in results:
  2168. results['gt_labels'] = results['gt_labels'][
  2169. valid_index]
  2170. if 'gt_masks' in results:
  2171. raise NotImplementedError(
  2172. 'RandomAffine only supports bbox.')
  2173. return results
  2174. def filter_gt_bboxes(self, origin_bboxes, wrapped_bboxes):
  2175. origin_w = origin_bboxes[:, 2] - origin_bboxes[:, 0]
  2176. origin_h = origin_bboxes[:, 3] - origin_bboxes[:, 1]
  2177. wrapped_w = wrapped_bboxes[:, 2] - wrapped_bboxes[:, 0]
  2178. wrapped_h = wrapped_bboxes[:, 3] - wrapped_bboxes[:, 1]
  2179. aspect_ratio = np.maximum(wrapped_w / (wrapped_h + 1e-16),
  2180. wrapped_h / (wrapped_w + 1e-16))
  2181. wh_valid_idx = (wrapped_w > self.min_bbox_size) & \
  2182. (wrapped_h > self.min_bbox_size)
  2183. area_valid_idx = wrapped_w * wrapped_h / (origin_w * origin_h +
  2184. 1e-16) > self.min_area_ratio
  2185. aspect_ratio_valid_idx = aspect_ratio < self.max_aspect_ratio
  2186. return wh_valid_idx & area_valid_idx & aspect_ratio_valid_idx
  2187. def __repr__(self):
  2188. repr_str = self.__class__.__name__
  2189. repr_str += f'(max_rotate_degree={self.max_rotate_degree}, '
  2190. repr_str += f'max_translate_ratio={self.max_translate_ratio}, '
  2191. repr_str += f'scaling_ratio={self.scaling_ratio_range}, '
  2192. repr_str += f'max_shear_degree={self.max_shear_degree}, '
  2193. repr_str += f'border={self.border}, '
  2194. repr_str += f'border_val={self.border_val}, '
  2195. repr_str += f'min_bbox_size={self.min_bbox_size}, '
  2196. repr_str += f'min_area_ratio={self.min_area_ratio}, '
  2197. repr_str += f'max_aspect_ratio={self.max_aspect_ratio})'
  2198. return repr_str
  2199. @staticmethod
  2200. def _get_rotation_matrix(rotate_degrees):
  2201. radian = math.radians(rotate_degrees)
  2202. rotation_matrix = np.array(
  2203. [[np.cos(radian), -np.sin(radian), 0.],
  2204. [np.sin(radian), np.cos(radian), 0.], [0., 0., 1.]],
  2205. dtype=np.float32)
  2206. return rotation_matrix
  2207. @staticmethod
  2208. def _get_scaling_matrix(scale_ratio):
  2209. scaling_matrix = np.array(
  2210. [[scale_ratio, 0., 0.], [0., scale_ratio, 0.], [0., 0., 1.]],
  2211. dtype=np.float32)
  2212. return scaling_matrix
  2213. @staticmethod
  2214. def _get_share_matrix(scale_ratio):
  2215. scaling_matrix = np.array(
  2216. [[scale_ratio, 0., 0.], [0., scale_ratio, 0.], [0., 0., 1.]],
  2217. dtype=np.float32)
  2218. return scaling_matrix
  2219. @staticmethod
  2220. def _get_shear_matrix(x_shear_degrees, y_shear_degrees):
  2221. x_radian = math.radians(x_shear_degrees)
  2222. y_radian = math.radians(y_shear_degrees)
  2223. shear_matrix = np.array([[1, np.tan(x_radian), 0.],
  2224. [np.tan(y_radian), 1, 0.], [0., 0., 1.]],
  2225. dtype=np.float32)
  2226. return shear_matrix
  2227. @staticmethod
  2228. def _get_translation_matrix(x, y):
  2229. translation_matrix = np.array([[1, 0., x], [0., 1, y], [0., 0., 1.]],
  2230. dtype=np.float32)
  2231. return translation_matrix
  2232. @PIPELINES.register_module()
  2233. class SimpleCopyPaste:
  2234. def __init__(self,
  2235. scale=(0.1, 2),
  2236. max_paste_objects=5,
  2237. prob=0.5,
  2238. flip_prob=0.5,
  2239. occluded_area_thresh=300,
  2240. box_occlusion_thresh=10):
  2241. self.max_paste_objects = max_paste_objects
  2242. self.prob = prob
  2243. self.flip_prob = flip_prob
  2244. self.resize_scale = scale
  2245. self.occluded_area_thresh = occluded_area_thresh
  2246. self.box_occlusion_thresh = box_occlusion_thresh
  2247. def get_indexes(self, dataset):
  2248. """Call function to collect indexes.
  2249. Args:
  2250. dataset (:obj:`MultiImageMixDataset`): The dataset.
  2251. Returns:
  2252. list: indexes.
  2253. """
  2254. return random.randint(0, len(dataset))
  2255. def random_bbox_indexes(self, arr):
  2256. return np.random.randint(
  2257. 0, arr.shape[0], size=min(arr.shape[0], self.max_paste_objects))
  2258. def rescale_boxes(self, bboxes, rescale_ratio, img_shape=None, clip=False):
  2259. if isinstance(rescale_ratio, float):
  2260. bboxes = bboxes * rescale_ratio
  2261. if isinstance(rescale_ratio, tuple):
  2262. bboxes[:, 0::2] = bboxes[:, 0::2] * rescale_ratio[1]
  2263. bboxes[:, 1::2] = bboxes[:, 1::2] * rescale_ratio[0]
  2264. if clip:
  2265. bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
  2266. bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
  2267. return bboxes
  2268. def get_rescale_ratio(self, n_values):
  2269. ratio = []
  2270. for x in range(n_values):
  2271. ratio.append(
  2272. np.random.uniform(self.resize_scale[0], self.resize_scale[1]))
  2273. return ratio
  2274. def get_updated_masks(self, parent_mask, child_mask):
  2275. assert parent_mask.shape == child_mask.shape, \
  2276. 'Cannot compare two arrays of different size'
  2277. return np.where(parent_mask, 0, child_mask)
  2278. def get_box_from_mask(self, mask):
  2279. """Convert mask Y to a bounding box, assumes 0 as background nonzero
  2280. object."""
  2281. Y_vals, X_vals = np.nonzero(mask)
  2282. if len(set(Y_vals.tolist()))<2 or len(set(X_vals.tolist()))<2:
  2283. return np.zeros(4, dtype=np.float32)
  2284. y1 = np.min(Y_vals)
  2285. x1 = np.min(X_vals)
  2286. y2 = np.max(Y_vals)
  2287. x2 = np.max(X_vals)
  2288. return np.array([x1, y1, x2, y2], dtype=np.float32)
  2289. def is_box_occluded(self, box1, box2, box_iou_threshold):
  2290. if np.any(np.abs(box1 - box2) > box_iou_threshold):
  2291. return True
  2292. return False
  2293. def large_scale_jitter(self, img, boxes, labels):
  2294. img_rescale_ratio = self.get_rescale_ratio(1)[0]
  2295. h, w, _ = img.shape
  2296. h_new, w_new = int(h * img_rescale_ratio), int(w * img_rescale_ratio)
  2297. img_rescaled = mmcv.imrescale(
  2298. img, img_rescale_ratio, return_scale=False)
  2299. boxes_rescaled = self.rescale_boxes(boxes, img_rescale_ratio)
  2300. # get random points for crop / pad
  2301. x, y = int(np.random.uniform(0, abs(w_new - w))), int(
  2302. np.random.uniform(0, abs(h_new - h)))
  2303. final_img, final_boxes, final_labels = None, [], labels
  2304. if img_rescale_ratio <= 1.0:
  2305. # pad
  2306. padding = (x, y, w - x - w_new, h - y - h_new)
  2307. final_img = mmcv.impad(
  2308. img_rescaled, padding=padding, pad_val=(100, 100, 100))
  2309. final_boxes = boxes_rescaled + np.array([x, y, x, y])
  2310. else:
  2311. # crop
  2312. final_img = img_rescaled[y:y + h, x:x + w, :]
  2313. offset = np.array([x, y, x, y], dtype=np.float32)
  2314. boxes_offset = boxes_rescaled - offset
  2315. boxes_offset[:, 0::2] = np.clip(boxes_offset[:, 0::2], 0, w)
  2316. boxes_offset[:, 1::2] = np.clip(boxes_offset[:, 1::2], 0, h)
  2317. valid_inds = (boxes_offset[:, 2] > boxes_offset[:, 0]) & (
  2318. boxes_offset[:, 3] > boxes_offset[:, 1])
  2319. final_boxes = boxes_offset[valid_inds, :]
  2320. return final_img, final_boxes, final_labels
  2321. def get_bitmapmasks(self, final_mask_list):
  2322. from mmdet.core import BitmapMasks
  2323. masks_ndarray = np.array(final_mask_list)
  2324. return BitmapMasks(masks_ndarray, masks_ndarray.shape[1],
  2325. masks_ndarray.shape[2])
  2326. def bbox_flip_horizontal(self, bboxes, img_shape):
  2327. """Flip bboxes horizontally.
  2328. Args:
  2329. bboxes (numpy.ndarray): Bounding boxes, shape (..., 4*k)
  2330. img_shape (tuple[int]): Image shape (height, width)
  2331. Returns:
  2332. numpy.ndarray: Flipped bounding boxes.
  2333. """
  2334. assert bboxes.shape[-1] % 4 == 0
  2335. flipped = bboxes.copy()
  2336. w = img_shape[1]
  2337. flipped[..., 0::4] = w - bboxes[..., 2::4]
  2338. flipped[..., 2::4] = w - bboxes[..., 0::4]
  2339. return flipped
  2340. def bbox_flip_vertically(self, bboxes, img_shape):
  2341. """Flip bboxes vertically.
  2342. Args:
  2343. bboxes (numpy.ndarray): Bounding boxes, shape (..., 4*k)
  2344. img_shape (tuple[int]): Image shape (height, width)
  2345. Returns:
  2346. numpy.ndarray: Flipped bounding boxes.
  2347. """
  2348. assert bboxes.shape[-1] % 4 == 0
  2349. flipped = bboxes.copy()
  2350. h = img_shape[0]
  2351. flipped[..., 1::4] = h - bboxes[..., 3::4]
  2352. flipped[..., 3::4] = h - bboxes[..., 1::4]
  2353. return flipped
  2354. def get_mask_from_box(self, img_shape, boxes):
  2355. """
  2356. img_shape: tuple(H,W)
  2357. boxes: box list
  2358. """
  2359. import cv2
  2360. H,W = img_shape
  2361. maskes = []
  2362. for x1,y1,x2,y2 in boxes:
  2363. mask = np.zeros((H,W), dtype=np.uint8)
  2364. box = np.array([[[x1,y1],[x2,y1],[x2,y2],[x1,y2]]], dtype=np.int32)
  2365. try:
  2366. mask = cv2.fillPoly(mask, box, 1)
  2367. except:
  2368. print(box.shape, box)
  2369. maskes.append(mask)
  2370. assert len(maskes) == len(boxes), 'len(maskes) != len(boxes)'
  2371. return maskes
  2372. def gen_bitmapmasks_from_box(self, img_shape, boxes):
  2373. mask = self.get_mask_from_box(img_shape, boxes)
  2374. return self.get_bitmapmasks(mask)
  2375. def __call__(self, results):
  2376. if random.uniform(0, 1) > self.prob:
  2377. return results
  2378. results_input = results.copy()
  2379. results_cpy = copy.deepcopy(results)
  2380. results_cpy2 = results_cpy['mix_results'][0]
  2381. img_dest = results_cpy['img']
  2382. labels_dest = results_cpy.get('gt_labels')
  2383. boxes_dest = results_cpy.get('gt_bboxes')
  2384. img_src = results_cpy2['img']
  2385. labels_src = results_cpy2.get('gt_labels')
  2386. boxes_src = results_cpy2.get('gt_bboxes')
  2387. if (not len(boxes_src)) or (not len(boxes_dest)):
  2388. return results
  2389. selected_idxs = self.random_bbox_indexes(boxes_src)
  2390. selected_boxes_src = np.array(
  2391. list(map(boxes_src.__getitem__, selected_idxs)))
  2392. selected_labels_src = np.array(
  2393. list(map(labels_src.__getitem__, selected_idxs)))
  2394. if np.random.random() < self.flip_prob:
  2395. img_src = mmcv.imflip(img_src, direction='vertical')
  2396. selected_boxes_src = self.bbox_flip_vertically(selected_boxes_src,
  2397. img_src.shape)
  2398. rescaled_img_dest, \
  2399. rescaled_boxes_dest, \
  2400. labels_dest = self.large_scale_jitter(img_dest,
  2401. boxes_dest,
  2402. labels_dest)
  2403. rescaled_img_src, \
  2404. rescaled_boxes_src, \
  2405. selected_labels_src = self.large_scale_jitter(img_src,
  2406. selected_boxes_src,
  2407. selected_labels_src)
  2408. if (not len(rescaled_boxes_src)) or (not len(rescaled_boxes_dest)):
  2409. return results
  2410. dest_h, dest_w, _ = rescaled_img_dest.shape
  2411. src_h, src_w, _ = rescaled_img_src.shape
  2412. rescaled_masks_src = self.gen_bitmapmasks_from_box((src_h, src_w), rescaled_boxes_src)
  2413. rescaled_masks_dest = self.gen_bitmapmasks_from_box((dest_h, dest_w), rescaled_boxes_dest)
  2414. pastable_img_src = mmcv.imresize(
  2415. rescaled_img_src, (dest_w, dest_h), return_scale=False)
  2416. pastable_masks_src = rescaled_masks_src.resize((dest_h, dest_w))
  2417. pastable_boxes_src = self.rescale_boxes(
  2418. rescaled_boxes_src, (dest_h / src_h, dest_w / src_w))
  2419. collated_pastable_mask = np.where(
  2420. np.any(pastable_masks_src.masks, axis=0), 1, 0)
  2421. final_dest_masks, final_dest_boxes, final_dest_labels = [], [], []
  2422. for mask_dest, box_dest, label_dest in zip(rescaled_masks_dest.masks,
  2423. rescaled_boxes_dest,
  2424. labels_dest):
  2425. updated_mask_dest = self.get_updated_masks(collated_pastable_mask,
  2426. mask_dest)
  2427. updated_box_dest = self.get_box_from_mask(updated_mask_dest)
  2428. if self.is_box_occluded(updated_box_dest, box_dest, self.box_occlusion_thresh):
  2429. if np.sum(updated_mask_dest) <= self.occluded_area_thresh:
  2430. continue
  2431. final_dest_boxes.append(updated_box_dest)
  2432. final_dest_masks.append(updated_mask_dest)
  2433. final_dest_labels.append(label_dest)
  2434. for src_mask, src_box, src_label in zip(pastable_masks_src.masks,
  2435. pastable_boxes_src,
  2436. selected_labels_src):
  2437. src_mask_3channel = np.tile(src_mask[:, :, None], (1, 1, 3)) * 255
  2438. final_pastable_mask_cutout = cv2.bitwise_and(
  2439. pastable_img_src, src_mask_3channel)
  2440. rescaled_img_dest = cv2.subtract(rescaled_img_dest,
  2441. src_mask_3channel)
  2442. rescaled_img_dest = cv2.add(rescaled_img_dest,
  2443. final_pastable_mask_cutout)
  2444. final_dest_boxes.append(src_box)
  2445. final_dest_masks.append(src_mask)
  2446. final_dest_labels.append(src_label)
  2447. if not final_dest_masks:
  2448. return results
  2449. final_dest_masks_ = self.get_bitmapmasks(final_dest_masks)
  2450. results['img'] = rescaled_img_dest
  2451. results['img_shape'] = rescaled_img_dest.shape
  2452. results['ori_shape'] = rescaled_img_dest.shape
  2453. # results['gt_masks'] = final_dest_masks_
  2454. results['gt_bboxes'] = np.array(final_dest_boxes, dtype=np.float32)
  2455. results['gt_labels'] = np.array(final_dest_labels)
  2456. return results
  2457. def __repr__(self):
  2458. repr_str = self.__class__.__name__
  2459. repr_str += f'(max_paste_objects={self.max_paste_objects}, '
  2460. repr_str += f'prob={self.prob}, '
  2461. repr_str += f'resize_scale={self.resize_scale}, '
  2462. repr_str += f'occluded_area_thresh={self.occluded_area_thresh}, '
  2463. repr_str += f'box_occlusion_thresh={self.box_occlusion_thresh}, '
  2464. return repr_str

No Description

Contributors (2)