You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensorflow_vision.py 49 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import tensorflow as tf
  4. import numpy as np
  5. from tensorflow.python.ops import math_ops
  6. from tensorflow.python.ops import array_ops, random_ops
  7. from tensorflow.python.framework import ops
  8. from tensorflow.python.ops.image_ops_impl import _AssertAtLeast3DImage
  9. from tensorflow.python.framework import dtypes
  10. from tensorflow.python.ops.image_ops_impl import convert_image_dtype
  11. import numbers
  12. import PIL
  13. from PIL import Image
  14. import math
  15. import scipy
  16. from scipy import ndimage
  17. __all__ = [
  18. 'central_crop',
  19. 'to_tensor',
  20. 'crop',
  21. 'pad',
  22. 'resize',
  23. 'transpose',
  24. 'hwc_to_chw',
  25. 'chw_to_hwc',
  26. 'rgb_to_hsv',
  27. 'hsv_to_rgb',
  28. 'rgb_to_gray',
  29. 'adjust_brightness',
  30. 'adjust_contrast',
  31. 'adjust_hue',
  32. 'adjust_saturation',
  33. 'normalize',
  34. 'hflip',
  35. 'vflip',
  36. 'padtoboundingbox',
  37. 'standardize',
  38. 'random_brightness',
  39. 'random_contrast',
  40. 'random_saturation',
  41. 'random_hue',
  42. 'random_crop',
  43. 'random_resized_crop',
  44. 'random_vflip',
  45. 'random_hflip',
  46. 'random_rotation',
  47. 'random_shear',
  48. 'random_shift',
  49. 'random_zoom',
  50. 'random_affine',
  51. ]
  52. def _is_pil_image(image):
  53. return isinstance(image, Image.Image)
  54. def _is_numpy_image(image):
  55. return isinstance(image, np.ndarray) and (image.ndim in {2, 3})
  56. def _get_image_size(image):
  57. image_shape = image.get_shape()
  58. if image_shape.ndims == 3:
  59. height, width, channels = image_shape
  60. return height, width
  61. elif image_shape.ndims == 4:
  62. batch, height, width, channels = image_shape
  63. return height, width
  64. def random_factor(factor, name, center=1, bound=(0, float('inf')), non_negative=True):
  65. if isinstance(factor, numbers.Number):
  66. if factor < 0:
  67. raise ValueError('The input value of {} cannot be negative.'.format(name))
  68. factor = [center - factor, center + factor]
  69. if non_negative:
  70. factor[0] = max(0, factor[0])
  71. elif isinstance(factor, (tuple, list)) and len(factor) == 2:
  72. if not bound[0] <= factor[0] <= factor[1] <= bound[1]:
  73. raise ValueError(
  74. "Please check your value range of {} is valid and "
  75. "within the bound {}.".format(name, bound)
  76. )
  77. else:
  78. raise TypeError("Input of {} should be either a single value, or a list/tuple of " "length 2.".format(name))
  79. factor = np.random.uniform(factor[0], factor[1])
  80. return factor
  81. def central_crop(image, size=None, central_fraction=None):
  82. '''
  83. Parameters
  84. ----------
  85. image :
  86. input Either a 3-D float Tensor of shape [height, width, depth],
  87. or a 4-D Tensor of shape [batch_size, height, width, depth].
  88. central_fraction :
  89. float (0, 1], fraction of size to crop
  90. size:
  91. size (Union[int, sequence]) – The output size of the cropped image. If size is an integer, a square crop of size (size, size) is returned.
  92. If size is a sequence of length 2, it should be (height, width).
  93. Returns :
  94. 3-D / 4-D float Tensor, as per the input.
  95. -------
  96. '''
  97. if size is None and central_fraction is None:
  98. raise ValueError('central_fraction and size can not be both None')
  99. if size is not None:
  100. if not isinstance(size, (int, list, tuple)) or (isinstance(size, (list, tuple)) and len(size) != 2):
  101. raise ValueError(
  102. "Size should be a single integer or a list/tuple (h, w) of length 2.But"
  103. "got {}.".format(type(size))
  104. )
  105. if isinstance(size, int):
  106. target_height = size
  107. target_width = size
  108. else:
  109. target_height = size[0]
  110. target_width = size[1]
  111. image = ops.convert_to_tensor(image, name='image')
  112. rank = image.get_shape().ndims
  113. if rank != 3 and rank != 4:
  114. raise ValueError(
  115. '`image` should either be a Tensor with rank = 3 or '
  116. 'rank = 4. Had rank = {}.'.format(rank)
  117. )
  118. def _get_dim(tensor, idx):
  119. static_shape = tensor.get_shape().dims[idx].value
  120. if static_shape is not None:
  121. return static_shape, False
  122. return array_ops.shape(tensor)[idx], True
  123. if rank == 3:
  124. img_h, dynamic_h = _get_dim(image, 0)
  125. img_w, dynamic_w = _get_dim(image, 1)
  126. img_d = image.get_shape()[2]
  127. else:
  128. img_bs = image.get_shape()[0]
  129. img_h, dynamic_h = _get_dim(image, 1)
  130. img_w, dynamic_w = _get_dim(image, 2)
  131. img_d = image.get_shape()[3]
  132. bbox_h_size = target_height
  133. bbox_w_size = target_width
  134. if dynamic_h:
  135. img_hd = math_ops.cast(img_h, dtypes.float64)
  136. target_height = math_ops.cast(target_height, dtypes.float64)
  137. bbox_h_start = math_ops.cast((img_hd - target_height) / 2, dtypes.int32)
  138. else:
  139. img_hd = float(img_h)
  140. target_height = float(target_height)
  141. bbox_h_start = int((img_hd - target_height) / 2)
  142. if dynamic_w:
  143. img_wd = math_ops.cast(img_w, dtypes.float64)
  144. target_width = math_ops.cast(target_width, dtypes.float64)
  145. bbox_w_start = math_ops.cast((img_wd - target_width) / 2, dtypes.int32)
  146. else:
  147. img_wd = float(img_w)
  148. target_width = float(target_width)
  149. bbox_w_start = int((img_wd - target_width) / 2)
  150. if rank == 3:
  151. bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0])
  152. bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1])
  153. else:
  154. bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0])
  155. bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1])
  156. image = array_ops.slice(image, bbox_begin, bbox_size)
  157. if rank == 3:
  158. image.set_shape([None if dynamic_h else bbox_h_size, None if dynamic_w else bbox_w_size, img_d])
  159. else:
  160. image.set_shape([img_bs, None if dynamic_h else bbox_h_size, None if dynamic_w else bbox_w_size, img_d])
  161. return image
  162. elif central_fraction is not None:
  163. return tf.image.central_crop(image, central_fraction)
  164. def to_tensor(img, data_format):
  165. '''Converts a ``image`` to tf.Tensor.
  166. Parameters
  167. ----------
  168. img:
  169. Image to be converted to tensor.
  170. data_format:
  171. Data format of output tensor, should be 'HWC' or
  172. 'CHW'. Default: 'HWC'.
  173. Returns:
  174. Tensor: Converted image.
  175. -------
  176. '''
  177. if not (_is_pil_image(img) or _is_numpy_image(img)):
  178. raise TypeError('img should be PIL Image or ndarray. But got {}'.format(type(img)))
  179. if _is_pil_image(img):
  180. # PIL Image
  181. if img.mode == 'I':
  182. image = tf.convert_to_tensor(np.array(img, np.int32, copy=False))
  183. elif img.mode == 'I;16':
  184. # cast and reshape not support int16
  185. image = tf.convert_to_tensor(np.array(img, np.int32, copy=False))
  186. elif img.mode == 'F':
  187. image = tf.convert_to_tensor(np.array(img, np.float32, copy=False))
  188. elif img.mode == '1':
  189. image = 255 * tf.convert_to_tensor(np.array(img, np.uint8, copy=False))
  190. else:
  191. image = tf.convert_to_tensor(np.array(img, copy=False))
  192. if img.mode == 'YCbCr':
  193. nchannel = 3
  194. elif img.mode == 'I;16':
  195. nchannel = 1
  196. else:
  197. nchannel = len(img.mode)
  198. dtype = image.dtype
  199. if dtype == 'tf.uint8':
  200. image = tf.cast(image, tf.float32) / 255.
  201. image = tf.reshape(image, shape=[img.size[1], img.size[0], nchannel])
  202. if data_format == 'CHW':
  203. image = tf.transpose(image, perm=[2, 0, 1])
  204. return image
  205. else:
  206. if img.ndim == 2:
  207. img = img[:, :, None]
  208. if data_format == 'CHW':
  209. img = tf.convert_to_tensor(img.transpose((2, 0, 1)))
  210. else:
  211. img = tf.convert_to_tensor(img)
  212. dtype = img.dtype
  213. if dtype == 'tf.uint8':
  214. img = tf.cast(img, tf.float32) / 255.
  215. return img
  216. def crop(image, offset_height, offset_width, target_height, target_width):
  217. return tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width)
  218. def pad(image, padding, padding_value, mode):
  219. '''
  220. Parameters
  221. ----------
  222. image:
  223. A 3-D or 4-D Tensor.
  224. padding:
  225. An integer or a list/tuple. If a single number is provided, pad all borders with this value.
  226. If a tuple or list of 2 values is provided, pad the left and right with the first value and the top and bottom with the second value.
  227. If 4 values are provided as a list or tuple, pad the (left , top, right, bottom) respectively.
  228. padding_value:
  229. In "CONSTANT" mode, the scalar pad value to use. Must be same type as tensor.
  230. mode:
  231. One of "CONSTANT", "REFLECT", or "SYMMETRIC" (case-insensitive)
  232. Returns:
  233. A padded Tensor. Has the same type as tensor.
  234. -------
  235. '''
  236. image = ops.convert_to_tensor(image, name='image')
  237. image_shape = image.get_shape()
  238. if len(image_shape) == 3:
  239. batch_size = 0
  240. elif len(image_shape) == 4:
  241. batch_size = image_shape[0]
  242. else:
  243. raise TypeError('Image must be a 3-D tensor or 4-D tensor.')
  244. if isinstance(padding, int):
  245. padding = ((padding, padding), (padding, padding))
  246. elif isinstance(padding, list) or isinstance(padding, tuple):
  247. if len(padding) == 2:
  248. padding = ((padding[1], padding[1]), (padding[0], padding[0]))
  249. elif len(padding) == 4:
  250. padding = ((padding[1], padding[3]), (padding[0], padding[2]))
  251. else:
  252. raise ValueError('The length of padding should be 2 or 4, but got {}.'.format(len(padding)))
  253. else:
  254. raise TypeError('Padding should be an integer or a list/tuple, but got {}.'.format(type(padding)))
  255. if batch_size == 0:
  256. padding = (padding[0], padding[1], (0, 0))
  257. else:
  258. padding = ((0, 0), padding[0], padding[1], (0, 0))
  259. return tf.pad(image, padding, mode=mode, constant_values=padding_value)
  260. def resize(image, size, method):
  261. '''
  262. Parameters
  263. ----------
  264. images:
  265. Input images to resize
  266. size:
  267. The output size of the resized image.
  268. If size is an integer, smaller edge of the image will be resized to this value with
  269. the same image aspect ratio.
  270. If size is a sequence of (height, width), this will be the desired output size.
  271. method:
  272. An image.ResizeMethod, or string equivalent shoulid be in
  273. (bilinear, lanczos3, lanczos5, bicubic, gaussian, nearest, area, mitchellcubic).
  274. Defaults to bilinear.
  275. preserve_aspect_ratio:
  276. Whether to preserve the aspect ratio.
  277. Returns:
  278. resized images
  279. -------
  280. '''
  281. if not (isinstance(size, int) or (isinstance(size, (list, tuple)) and len(size) == 2)):
  282. raise TypeError('Size should be a single number or a list/tuple (h, w) of length 2.' 'Got {}.'.format(size))
  283. image = ops.convert_to_tensor(image)
  284. orig_dtype = image.dtype
  285. if orig_dtype not in [dtypes.float16, dtypes.float32]:
  286. image = convert_image_dtype(image, dtypes.float32)
  287. if image.get_shape().ndims == 3:
  288. h, w, _ = image.get_shape().as_list()
  289. elif image.get_shape().ndims == 4:
  290. _, h, w, _ = image.get_shape().as_list()
  291. if isinstance(size, int):
  292. if (w <= h and w == size) or (h <= w and h == size):
  293. size = (h, w)
  294. if w < h:
  295. target_w = size
  296. target_h = int(size * h / w)
  297. size = (target_h, target_w)
  298. else:
  299. target_h = size
  300. target_w = int(size * w / h)
  301. size = (target_h, target_w)
  302. image = tf.image.resize(image, size, method, preserve_aspect_ratio=False)
  303. return convert_image_dtype(image, orig_dtype, saturate=True)
  304. def transpose(image, order):
  305. image = ops.convert_to_tensor(image)
  306. shape = image.get_shape()
  307. if shape.ndims == 3 or shape.ndims is None:
  308. if len(order) != 3:
  309. raise ValueError('if image is 3-D tensor, order should be a list/tuple with length of 3')
  310. return array_ops.transpose(image, order)
  311. elif shape.ndims == 4:
  312. if len(order) != 4:
  313. raise ValueError('if image is 4-D tensor, order should be a list/tuple with length of 4')
  314. return array_ops.transpose(image, order)
  315. else:
  316. raise ValueError('\'image\' must have either 3 or 4 dimensions.')
  317. def hwc_to_chw(image):
  318. if (len(image.shape) == 3):
  319. return transpose(image, (2, 0, 1))
  320. elif (len(image.shape) == 4):
  321. return transpose(image, (0, 3, 1, 2))
  322. else:
  323. raise ValueError('\'image\' must have either 3 or 4 dimensions.')
  324. def chw_to_hwc(image):
  325. if (len(image.shape) == 3):
  326. return transpose(image, (1, 2, 0))
  327. elif (len(image.shape) == 4):
  328. return transpose(image, (0, 2, 3, 1))
  329. else:
  330. raise ValueError('\'image\' must have either 3 or 4 dimensions.')
  331. def rgb_to_hsv(image):
  332. return tf.image.rgb_to_hsv(image)
  333. def hsv_to_rgb(image):
  334. return tf.image.hsv_to_rgb(image)
  335. def rgb_to_gray(image, num_output_channels):
  336. if num_output_channels not in (1, 3):
  337. raise ValueError('num_output_channels should be either 1 or 3')
  338. image = ops.convert_to_tensor(image, name='image')
  339. orig_dtype = image.dtype
  340. flt_image = convert_image_dtype(image, dtypes.float32)
  341. rgb_weights = [0.2989, 0.5870, 0.1140]
  342. gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1])
  343. gray_float = array_ops.expand_dims(gray_float, -1)
  344. if num_output_channels == 3:
  345. gray_float = array_ops.stack([gray_float, gray_float, gray_float], axis=2)
  346. return convert_image_dtype(gray_float, orig_dtype)
  347. def adjust_brightness(image, brightness_factor):
  348. '''
  349. Parameters
  350. ----------
  351. images:
  352. Input images to adjust brightness
  353. brightness_factor(float): How much to adjust the brightness. Can be
  354. any non negative number. 0 gives a black image, 1 gives the
  355. original image while 2 increases the brightness by a factor of 2.
  356. Returns:
  357. adjusted images
  358. -------
  359. '''
  360. if brightness_factor < 0:
  361. raise ValueError('brightness_factor ({}) is not non-negative.'.format(brightness_factor))
  362. image = ops.convert_to_tensor(image, name='image')
  363. image = _AssertAtLeast3DImage(image)
  364. orig_dtype = image.dtype
  365. if orig_dtype not in [dtypes.float16, dtypes.float32]:
  366. image = convert_image_dtype(image, dtypes.float32)
  367. brightness_factor = math_ops.cast(brightness_factor, image.dtype)
  368. image_zeros = tf.zeros_like(image)
  369. adjusted = brightness_factor * image + (1.0 - brightness_factor) * image_zeros
  370. adjusted = tf.clip_by_value(adjusted, clip_value_min=0, clip_value_max=1.0)
  371. return convert_image_dtype(adjusted, orig_dtype, saturate=True)
  372. def adjust_contrast(image, contrast_factor):
  373. '''
  374. Parameters
  375. ----------
  376. images:
  377. Input images to adjust contrast
  378. contrast_factor(float): How much to adjust the contrast. Can be
  379. any non negative number. 0 gives a gray image, 1 gives the
  380. original image while 2 increases the contrast by a factor of 2.
  381. Returns:
  382. adjusted images
  383. -------
  384. '''
  385. if contrast_factor < 0:
  386. raise ValueError('contrast_factor ({}) is not non-negative.'.format(contrast_factor))
  387. image = ops.convert_to_tensor(image, name='image')
  388. image = _AssertAtLeast3DImage(image)
  389. orig_dtype = image.dtype
  390. if orig_dtype not in [dtypes.float16, dtypes.float32]:
  391. image = convert_image_dtype(image, dtypes.float32)
  392. contrast_factor = math_ops.cast(contrast_factor, image.dtype)
  393. mean = tf.math.reduce_mean(tf.image.rgb_to_grayscale(image), keepdims=True)
  394. adjusted = contrast_factor * image + (1 - contrast_factor) * mean
  395. adjusted = tf.clip_by_value(adjusted, clip_value_min=0, clip_value_max=1.0)
  396. return convert_image_dtype(adjusted, orig_dtype, saturate=True)
  397. def adjust_hue(image, hue_factor):
  398. '''
  399. Parameters
  400. ----------
  401. images(Tensor):
  402. Input images to adjust hue
  403. hue_factor(float): How much to shift the hue channel. Should be in
  404. [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
  405. HSV space in positive and negative direction respectively.
  406. 0 means no shift. Therefore, both -0.5 and 0.5 will give an image
  407. with complementary colors while 0 gives the original image.
  408. Returns(Tensor):
  409. Adjusted images
  410. -------
  411. '''
  412. if not (-0.5 <= hue_factor <= 0.5):
  413. raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(hue_factor))
  414. image = ops.convert_to_tensor(image, name='image')
  415. image = _AssertAtLeast3DImage(image)
  416. orig_dtype = image.dtype
  417. if orig_dtype not in [dtypes.float16, dtypes.float32]:
  418. image = convert_image_dtype(image, dtypes.float32)
  419. hue_factor = math_ops.cast(hue_factor, image.dtype)
  420. image = tf.image.rgb_to_hsv(image)
  421. h, s, v = tf.split(image, num_or_size_splits=[1, 1, 1], axis=2)
  422. h = (h + hue_factor) % 1.0
  423. image = tf.concat((h, s, v), axis=2)
  424. adjusted = tf.image.hsv_to_rgb(image)
  425. return convert_image_dtype(adjusted, orig_dtype, saturate=True)
  426. def adjust_saturation(image, saturation_factor):
  427. '''
  428. Parameters
  429. ----------
  430. images(Tensor):
  431. Input images to adjust saturation
  432. contrast_factor(float): How much to adjust the saturation. 0 will
  433. give a black and white image, 1 will give the original image while
  434. 2 will enhance the saturation by a factor of 2.
  435. Returns(Tensor):
  436. Adjusted images
  437. -------
  438. '''
  439. if saturation_factor < 0:
  440. raise ValueError('saturation_factor ({}) is not non-negative.'.format(saturation_factor))
  441. image = ops.convert_to_tensor(image, name='image')
  442. image = _AssertAtLeast3DImage(image)
  443. orig_dtype = image.dtype
  444. if orig_dtype not in [dtypes.float16, dtypes.float32]:
  445. image = convert_image_dtype(image, dtypes.float32)
  446. saturation_factor = math_ops.cast(saturation_factor, image.dtype)
  447. gray_image = tf.image.rgb_to_grayscale(image)
  448. adjusted = saturation_factor * image + (1 - saturation_factor) * gray_image
  449. adjusted = tf.clip_by_value(adjusted, clip_value_min=0, clip_value_max=1.0)
  450. return convert_image_dtype(adjusted, orig_dtype, saturate=True)
  451. def hflip(image):
  452. '''
  453. Parameters
  454. ----------
  455. image(Tensor):
  456. Input images to flip an image horizontally (left to right)
  457. Returns(Tensor):
  458. Flipped images
  459. -------
  460. '''
  461. return tf.image.flip_left_right(image)
  462. def vflip(image):
  463. '''
  464. Parameters
  465. ----------
  466. image(Tensor):
  467. Input images to flip an image vertically (up to down)
  468. Returns(Tensor):
  469. Flipped images
  470. -------
  471. '''
  472. return tf.image.flip_up_down(image)
  473. def padtoboundingbox(image, offset_height, offset_width, target_height, target_width, padding_value):
  474. '''
  475. Parameters
  476. ----------
  477. image:
  478. 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
  479. of shape `[height, width, channels]`.
  480. offset_height:
  481. Number of rows of padding_values to add on top.
  482. offset_width:
  483. Number of columns of padding_values to add on the left.
  484. target_height:
  485. Height of output image.
  486. target_width:
  487. Width of output image.
  488. padding_value:
  489. value to pad
  490. Returns:
  491. If `image` was 4-D, a 4-D float Tensor of shape
  492. `[batch, target_height, target_width, channels]`
  493. If `image` was 3-D, a 3-D float Tensor of shape
  494. `[target_height, target_width, channels]`
  495. -------
  496. '''
  497. image = ops.convert_to_tensor(image, name='image')
  498. if offset_height < 0:
  499. raise ValueError('offset_height must be >= 0')
  500. if offset_width < 0:
  501. raise ValueError('offset_width must be >= 0')
  502. image_shape = image.get_shape()
  503. if image_shape.ndims == 3:
  504. height, width, channels = image.get_shape()
  505. elif image_shape.ndims == 4:
  506. batch, height, width, channels = image.get_shape()
  507. else:
  508. raise ValueError('\'image\' (shape %s) must have either 3 or 4 dimensions.' % image_shape)
  509. after_padding_width = target_width - offset_width - width
  510. after_padding_height = target_height - offset_height - height
  511. if after_padding_height < 0:
  512. raise ValueError('image height must be <= target - offset')
  513. if after_padding_width < 0:
  514. raise ValueError('image width must be <= target - offset')
  515. return pad(
  516. image, padding=(offset_width, offset_height, after_padding_width, after_padding_height),
  517. padding_value=padding_value, mode='constant'
  518. )
  519. def normalize(image, mean, std, data_format):
  520. '''
  521. Parameters
  522. ----------
  523. image:
  524. An n-D Tensor with at least 3 dimensions, the last 3 of which are the dimensions of each image.
  525. mean:
  526. List or tuple of mean values for each channel, with respect to channel order.
  527. std:
  528. List or tuple of standard deviations for each channel.
  529. channel_mode:
  530. Decide to implement standardization on whole image or each channel of image.
  531. Returns:
  532. A Tensor with the same shape and dtype as image.
  533. -------
  534. '''
  535. image = ops.convert_to_tensor(image, name='image')
  536. image = math_ops.cast(image, dtype=tf.float32)
  537. image = _AssertAtLeast3DImage(image)
  538. if data_format == 'CHW':
  539. num_channels = image.shape[0]
  540. elif data_format == 'HWC':
  541. num_channels = image.shape[2]
  542. if isinstance(mean, numbers.Number):
  543. mean = (mean, ) * num_channels
  544. elif isinstance(mean, (list, tuple)):
  545. if len(mean) != num_channels:
  546. raise ValueError("Length of mean must be 1 or equal to the number of channels({0}).".format(num_channels))
  547. if isinstance(std, numbers.Number):
  548. std = (std, ) * num_channels
  549. elif isinstance(std, (list, tuple)):
  550. if len(std) != num_channels:
  551. raise ValueError("Length of std must be 1 or equal to the number of channels({0}).".format(num_channels))
  552. if data_format == 'CHW':
  553. std = np.float32(np.array(std).reshape((-1, 1, 1)))
  554. mean = np.float32(np.array(mean).reshape((-1, 1, 1)))
  555. elif data_format == 'HWC':
  556. mean = np.float32(np.array(mean).reshape((1, 1, -1)))
  557. std = np.float32(np.array(std).reshape((1, 1, -1)))
  558. mean = ops.convert_to_tensor(mean)
  559. mean = math_ops.cast(mean, dtype=tf.float32)
  560. std = ops.convert_to_tensor(std)
  561. std = math_ops.cast(std, dtype=tf.float32)
  562. image -= mean
  563. image = math_ops.divide(image, std)
  564. return image
  565. def standardize(image):
  566. '''
  567. Reference to tf.image.per_image_standardization().
  568. Linearly scales each image in image to have mean 0 and variance 1.
  569. Parameters
  570. ----------
  571. image:
  572. An n-D Tensor with at least 3 dimensions, the last 3 of which are the dimensions of each image.
  573. Returns:
  574. A Tensor with the same shape as image and its dtype is float32.
  575. -------
  576. '''
  577. image = ops.convert_to_tensor(image, name='image')
  578. image = math_ops.cast(image, dtype=tf.float32)
  579. return tf.image.per_image_standardization(image)
  580. def random_brightness(image, brightness_factor):
  581. '''
  582. Perform a random brightness on the input image.
  583. Parameters
  584. ----------
  585. image:
  586. Input images to adjust random brightness
  587. brightness_factor:
  588. Brightness adjustment factor (default=(1, 1)). Cannot be negative.
  589. If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness].
  590. If it is a sequence, it should be [min, max] for the range.
  591. Returns:
  592. Adjusted image.
  593. -------
  594. '''
  595. brightness_factor = random_factor(brightness_factor, name='brightness')
  596. return adjust_brightness(image, brightness_factor)
  597. def random_contrast(image, contrast_factor):
  598. '''
  599. Perform a random contrast on the input image.
  600. Parameters
  601. ----------
  602. image:
  603. Input images to adjust random contrast
  604. contrast_factor:
  605. Contrast adjustment factor (default=(1, 1)). Cannot be negative.
  606. If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast].
  607. If it is a sequence, it should be [min, max] for the range.
  608. Returns:
  609. Adjusted image.
  610. -------
  611. '''
  612. contrast_factor = random_factor(contrast_factor, name='contrast')
  613. return adjust_contrast(image, contrast_factor)
  614. def random_saturation(image, saturation_factor):
  615. '''
  616. Perform a random saturation on the input image.
  617. Parameters
  618. ----------
  619. image:
  620. Input images to adjust random saturation
  621. saturation_factor:
  622. Saturation adjustment factor (default=(1, 1)). Cannot be negative.
  623. If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation].
  624. If it is a sequence, it should be [min, max] for the range.
  625. Returns:
  626. Adjusted image.
  627. -------
  628. '''
  629. saturation_factor = random_factor(saturation_factor, name='saturation')
  630. return adjust_saturation(image, saturation_factor)
  631. def random_hue(image, hue_factor):
  632. '''
  633. Perform a random contrast on the input image.
  634. Parameters
  635. ----------
  636. image:
  637. Input images to adjust random contrast
  638. brightness_factor:
  639. Contrast adjustment factor (default=(1, 1)). Cannot be negative.
  640. If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast].
  641. If it is a sequence, it should be [min, max] for the range.
  642. Returns:
  643. Adjusted image.
  644. -------
  645. '''
  646. hue_factor = random_factor(hue_factor, name='hue', center=0, bound=(-0.5, 0.5), non_negative=False)
  647. return adjust_hue(image, hue_factor)
  648. def random_crop(image, size, padding, pad_if_needed, fill, padding_mode):
  649. '''
  650. Parameters
  651. ----------
  652. image:
  653. Input images to crop and pad if needed.
  654. size:
  655. Desired output size of the crop. If size is an int instead of sequence like (h, w),
  656. a square crop (size, size) is made. If provided a sequence of length 1,
  657. it will be interpreted as (size[0], size[0]).
  658. padding:
  659. Optional, padding on each border of the image. Default is None.
  660. If a single int is provided this is used to pad all borders.
  661. If sequence of length 2 is provided this is the padding on left/right and top/bottom respectively.
  662. If a sequence of length 4 is provided this is the padding for the left, top, right and bottom borders respectively.
  663. pad_if_needed:
  664. It will pad the image if smaller than the desired size to avoid raising an exception.
  665. Since cropping is done after padding, the padding seems to be done at a random offset.
  666. fill:
  667. Pixel fill value for constant fill. Default is 0.
  668. padding_mode:
  669. Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
  670. Returns:
  671. cropped images.
  672. -------
  673. '''
  674. image = ops.convert_to_tensor(image, name='image')
  675. _AssertAtLeast3DImage(image)
  676. if isinstance(size, int):
  677. size = (size, size)
  678. elif isinstance(size, (tuple, list)) and len(size) == 2:
  679. size = size
  680. else:
  681. raise ValueError('Size should be a int or a list/tuple with length of 2. ' 'But got {}'.format(size))
  682. size = ops.convert_to_tensor(size, dtype=dtypes.int32, name='size')
  683. if padding is not None:
  684. image = pad(image, padding, fill, padding_mode)
  685. image_shape = image.get_shape()
  686. if image_shape.ndims == 3:
  687. height, width, channels = image_shape
  688. elif image_shape.ndims == 4:
  689. batch, height, width, channels = image_shape
  690. if pad_if_needed and height < size[0]:
  691. image = pad(image, (0, size[0] - height), fill, padding_mode)
  692. if pad_if_needed and width < size[1]:
  693. image = pad(image, (size[1] - width, 0), fill, padding_mode)
  694. image_shape = image.get_shape()
  695. if image_shape.ndims == 3:
  696. height, width, channels = image_shape
  697. elif image_shape.ndims == 4:
  698. batch, height, width, channels = image_shape
  699. target_height, target_width = size
  700. if height < target_height or width < target_width:
  701. raise ValueError(
  702. 'Crop size {} should be smaller than input image size {}. '.format(
  703. (target_height, target_width), (height, width)
  704. )
  705. )
  706. if target_height == height and target_width == width:
  707. return crop(image, 0, 0, target_height, target_width)
  708. offset_height = random_ops.random_uniform([], minval=0, maxval=height - target_height + 1, dtype=size.dtype)
  709. offset_width = random_ops.random_uniform([], minval=0, maxval=width - target_width + 1, dtype=size.dtype)
  710. return crop(image, offset_height, offset_width, target_height, target_width)
  711. def random_resized_crop(image, size, scale, ratio, interpolation):
  712. '''Crop the given image to random size and aspect ratio.
  713. Parameters
  714. ----------
  715. image:
  716. 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels].
  717. size:
  718. Target size of output image, with (height, width) shape. if size is int, target size will be (size, size).
  719. scale:
  720. Range of size of the origin size cropped. Default: (0.08, 1.0)
  721. ratio:
  722. Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
  723. interpolation:
  724. Interpolation method. Default: 'bilinear'.
  725. Returns:
  726. Randomly cropped and resized image.
  727. -------
  728. '''
  729. if isinstance(size, int):
  730. size = (size, size)
  731. elif isinstance(size, (list, tuple)) and len(size) == 2:
  732. size = size
  733. else:
  734. raise TypeError('Size should be a int or a list/tuple with length of 2.' 'But got {}.'.format(size))
  735. if not (isinstance(scale, (list, tuple)) and len(scale) == 2):
  736. raise TypeError('Scale should be a list/tuple with length of 2.' 'But got {}.'.format(scale))
  737. if not (isinstance(ratio, (list, tuple)) and len(ratio) == 2):
  738. raise TypeError('Scale should be a list/tuple with length of 2.' 'But got {}.'.format(ratio))
  739. if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
  740. raise ValueError("Scale and ratio should be of kind (min, max)")
  741. image = ops.convert_to_tensor(image, name='image')
  742. image = _AssertAtLeast3DImage(image)
  743. def get_param(image, scale, ratio):
  744. height, width = _get_image_size(image)
  745. area = math_ops.cast(height * width, dtype=dtypes.float32)
  746. ratio = ops.convert_to_tensor(ratio, dtype=dtypes.float32)
  747. log_ratio = math_ops.log(ratio)
  748. for _ in range(10):
  749. target_area = area * random_ops.random_uniform([], minval=scale[0], maxval=scale[1], dtype=dtypes.float32)
  750. aspect_ratio = math_ops.exp(
  751. random_ops.random_uniform([], minval=log_ratio[0], maxval=log_ratio[1], dtype=dtypes.float32)
  752. )
  753. target_width = math_ops.to_int32(math_ops.round(math_ops.sqrt(target_area * aspect_ratio)))
  754. target_height = math_ops.to_int32(math_ops.round(math_ops.sqrt(target_area / aspect_ratio)))
  755. if 0 < target_width <= width and 0 < target_height <= height:
  756. offset_height = random_ops.random_uniform(
  757. [], minval=0, maxval=height - target_height + 1, dtype=dtypes.int32
  758. )
  759. offset_width = random_ops.random_uniform(
  760. [], minval=0, maxval=width - target_width + 1, dtype=dtypes.int32
  761. )
  762. return offset_height, offset_width, target_height, target_width
  763. height = ops.convert_to_tensor(height, dtype=dtypes.float32)
  764. width = ops.convert_to_tensor(width, dtype=dtypes.float32)
  765. in_ratio = width / height
  766. if in_ratio < ratio[0]:
  767. target_width = width
  768. target_height = math_ops.to_int32(math_ops.round(target_width / ratio[0]))
  769. elif in_ratio > ratio[1]:
  770. target_height = height
  771. target_width = math_ops.to_int32(math_ops.round(target_height / ratio[1]))
  772. else:
  773. target_height = height
  774. target_width = width
  775. offset_height = (height - target_height) // 2
  776. offset_width = (width - target_width) // 2
  777. return offset_height, offset_width, target_height, target_width
  778. offset_height, offset_width, target_heigth, target_width = get_param(image, scale, ratio)
  779. image = crop(image, offset_height, offset_width, target_heigth, target_width)
  780. image = resize(image, size, interpolation)
  781. return image
  782. def random_vflip(image, prob):
  783. '''Vertically flip the input image randomly with a given probability.
  784. Parameters
  785. ----------
  786. image:
  787. 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels].
  788. prob:
  789. probability of the image being flipped. Default value is 0.5
  790. Returns:
  791. A tensor of the same type and shape as image.
  792. -------
  793. '''
  794. image = ops.convert_to_tensor(image, name='image')
  795. image = _AssertAtLeast3DImage(image)
  796. random_prob = random_ops.random_uniform([], minval=0, maxval=1.0, dtype=dtypes.float32)
  797. flip_flag = math_ops.less(random_prob, prob)
  798. if flip_flag:
  799. return vflip(image)
  800. return image
  801. def random_hflip(image, prob):
  802. '''horizontally flip the input image randomly with a given probability.
  803. Parameters
  804. ----------
  805. image:
  806. 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels].
  807. prob:
  808. probability of the image being flipped. Default value is 0.5
  809. Returns:
  810. A tensor of the same type and shape as image.
  811. -------
  812. '''
  813. image = ops.convert_to_tensor(image, name='image')
  814. image = _AssertAtLeast3DImage(image)
  815. random_prob = random_ops.random_uniform([], minval=0, maxval=1.0, dtype=dtypes.float32)
  816. flip_flag = math_ops.less(random_prob, prob)
  817. if flip_flag:
  818. return hflip(image)
  819. return image
  820. def random_rotation(image, degrees, interpolation, expand, center, fill):
  821. '''Rotate the image by angle.
  822. Parameters
  823. ----------
  824. image:
  825. Input tensor. Must be 3D.
  826. degrees:
  827. Range of degrees to select from.If degrees is a number instead of sequence like (min, max), the range of degrees
  828. will be (-degrees, +degrees).
  829. interpolation:
  830. Points outside the boundaries of the input are filled according to the given mode
  831. (one of {'nearest', 'bilinear'}).
  832. expand:
  833. Optional expansion flag.
  834. If true, expands the output to make it large enough to hold the entire rotated image.
  835. If false or omitted, make the output image the same size as the input image.
  836. Note that the expand flag assumes rotation around the center and no translation.
  837. center:
  838. Optional center of rotation, (x, y). Origin is the upper left corner.
  839. Default is the center of the image.
  840. fill:
  841. Pixel fill value for the area outside the rotated image.
  842. Default is ``0``. If given a number, the value is used for all bands respectively.
  843. Returns:
  844. Rotated image tensor.
  845. -------
  846. '''
  847. if isinstance(image, (tf.Tensor, np.ndarray)) and len(image.shape) == 3:
  848. image = np.asarray(image)
  849. else:
  850. 'Image should be a 3d tensor or np.ndarray.'
  851. h, w, c = image.shape[0], image.shape[1], image.shape[2]
  852. if isinstance(degrees, numbers.Number):
  853. if degrees < 0:
  854. raise ValueError('If degrees is a single number, it must be positive.' 'But got {}'.format(degrees))
  855. degrees = (-degrees, degrees)
  856. elif not (isinstance(degrees, (list, tuple)) and len(degrees) == 2):
  857. raise ValueError('If degrees is a list/tuple, it must be length of 2.' 'But got {}'.format(degrees))
  858. else:
  859. if degrees[0] > degrees[1]:
  860. raise ValueError('if degrees is a list/tuple, it should be (min, max).')
  861. if isinstance(fill, numbers.Number):
  862. fill = (fill, ) * c
  863. elif not (isinstance(fill, (list, tuple)) and len(fill) == c):
  864. raise ValueError(
  865. 'If fill should be a single number or a list/tuple with length of image channels.'
  866. 'But got {}'.format(fill)
  867. )
  868. if interpolation not in ('nearest', 'bilinear'):
  869. raise ValueError('Interpolation only support {\'nearest\', \'bilinear\'} .')
  870. orig_dtype = image.dtype
  871. image = np.asarray(image, dtype=np.float)
  872. theta = np.random.uniform(degrees[0], degrees[1])
  873. angle = -math.radians(theta)
  874. rotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
  875. if center is None:
  876. rotn_center = (w / 2.0, h / 2.0)
  877. else:
  878. rotn_center = center
  879. matrix = [
  880. round(math.cos(angle), 15),
  881. round(math.sin(angle), 15),
  882. 0.0,
  883. round(-math.sin(angle), 15),
  884. round(math.cos(angle), 15),
  885. 0.0,
  886. ]
  887. def transform(x, y, matrix):
  888. (a, b, c, d, e, f) = matrix
  889. return a * x + b * y + c, d * x + e * y + f
  890. matrix[2], matrix[5] = transform(-rotn_center[0] - 0, -rotn_center[1] - 0, matrix)
  891. matrix[2] += rotn_center[0]
  892. matrix[5] += rotn_center[1]
  893. if expand:
  894. # calculate output size
  895. xx = []
  896. yy = []
  897. for x, y in ((0, 0), (w, 0), (w, h), (0, h)):
  898. x, y = transform(x, y, matrix)
  899. xx.append(x)
  900. yy.append(y)
  901. nw = math.ceil(max(xx)) - math.floor(min(xx))
  902. nh = math.ceil(max(yy)) - math.floor(min(yy))
  903. matrix[2], matrix[5] = transform(-(nw - w) / 2.0, -(nh - h) / 2.0, matrix)
  904. w, h = nw, nh
  905. image = np.rollaxis(image, 2, 0)
  906. dummy = np.ones((1, image.shape[1], image.shape[2]), dtype=image.dtype)
  907. image = np.concatenate((image, dummy), axis=0)
  908. final_offset = np.array([matrix[5], matrix[2]])
  909. channel_images = [
  910. ndimage.interpolation.affine_transform(
  911. x_channel, rotation_matrix, final_offset, output_shape=(h, w), order=3, mode='constant', cval=0
  912. ) for x_channel in image
  913. ]
  914. image = np.stack(channel_images, axis=0)
  915. image = np.rollaxis(image, 0, 3)
  916. mask = image[:, :, -1:]
  917. image = image[:, :, :-1]
  918. mask = np.tile(mask, (1, 1, image.shape[2]))
  919. fill = np.tile(fill, (image.shape[0], image.shape[1], 1))
  920. if interpolation == 'nearest':
  921. mask = mask < 0.5
  922. image[mask] = fill[mask]
  923. else:
  924. image = image * mask + (1.0 - mask) * fill
  925. image = np.asarray(image, dtype=orig_dtype)
  926. image = ops.convert_to_tensor(image)
  927. return image
  928. def transform_matrix_offset_center(matrix, x, y):
  929. o_x = float(x) / 2 + 0.5
  930. o_y = float(y) / 2 + 0.5
  931. offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
  932. reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
  933. transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
  934. return transform_matrix
  935. def random_shear(image, degrees, interpolation, fill):
  936. if isinstance(image, (tf.Tensor, np.ndarray)) and len(image.shape) == 3:
  937. image = np.asarray(image)
  938. else:
  939. 'Image should be a 3d tensor or np.ndarray.'
  940. h, w, c = image.shape[0], image.shape[1], image.shape[2]
  941. if interpolation not in ('nearest', 'bilinear'):
  942. raise ValueError('Interpolation only support {\'nearest\', \'bilinear\'} .')
  943. if isinstance(degrees, numbers.Number):
  944. degrees = (-degrees, degrees, 0, 0)
  945. elif isinstance(degrees, (list, tuple)) and (len(degrees) == 2 or len(degrees) == 4):
  946. if len(degrees) == 2:
  947. degrees = (degrees[0], degrees[1], 0, 0)
  948. else:
  949. raise ValueError(
  950. 'degrees should be a single number or a list/tuple with length in (2 ,4).'
  951. 'But got {}'.format(degrees)
  952. )
  953. if isinstance(fill, numbers.Number):
  954. fill = (fill, ) * c
  955. elif not (isinstance(fill, (list, tuple)) and len(fill) == c):
  956. raise ValueError(
  957. 'If fill should be a single number or a list/tuple with length of image channels.'
  958. 'But got {}'.format(fill)
  959. )
  960. orig_dtype = image.dtype
  961. image = np.asarray(image, dtype=np.float)
  962. shear = [np.random.uniform(degrees[0], degrees[1]), np.random.uniform(degrees[2], degrees[3])]
  963. shear = np.deg2rad(shear)
  964. shear_matrix = np.array(
  965. [[math.cos(shear[1]), math.sin(shear[1]), 0], [math.sin(shear[0]), math.cos(shear[0]), 0], [0, 0, 1]]
  966. )
  967. transform_matrix = shear_matrix
  968. transform_matrix = transform_matrix_offset_center(transform_matrix, h, w)
  969. shear_matrix = transform_matrix[:2, :2]
  970. offset = transform_matrix[:2, 2]
  971. image = np.rollaxis(image, 2, 0)
  972. dummy = np.ones((1, image.shape[1], image.shape[2]), dtype=image.dtype)
  973. image = np.concatenate((image, dummy), axis=0)
  974. channel_images = [
  975. ndimage.interpolation.affine_transform(x_channel, shear_matrix, offset, order=3, mode='constant', cval=0)
  976. for x_channel in image
  977. ]
  978. image = np.stack(channel_images, axis=0)
  979. image = np.rollaxis(image, 0, 3)
  980. mask = image[:, :, -1:]
  981. image = image[:, :, :-1]
  982. mask = np.tile(mask, (1, 1, c))
  983. fill = np.tile(fill, (h, w, 1))
  984. if interpolation == 'nearest':
  985. mask = mask < 0.5
  986. image[mask] = fill[mask]
  987. else:
  988. image = image * mask + (1.0 - mask) * fill
  989. image = np.asarray(image, dtype=orig_dtype)
  990. image = ops.convert_to_tensor(image)
  991. return image
  992. def random_shift(image, shift, interpolation, fill):
  993. if isinstance(image, (tf.Tensor, np.ndarray)) and len(image.shape) == 3:
  994. image = np.asarray(image)
  995. else:
  996. 'Image should be a 3d tensor or np.ndarray.'
  997. h, w, c = image.shape[0], image.shape[1], image.shape[2]
  998. if interpolation not in ('nearest', 'bilinear'):
  999. raise ValueError('Interpolation only support {\'nearest\', \'bilinear\'} .')
  1000. if not (isinstance(shift, (tuple, list)) and len(shift) == 2):
  1001. raise ValueError('Shift should be a list/tuple with length of 2.' 'But got {}'.format(shift))
  1002. if isinstance(fill, numbers.Number):
  1003. fill = (fill, ) * c
  1004. elif not (isinstance(fill, (list, tuple)) and len(fill) == c):
  1005. raise ValueError(
  1006. 'If fill should be a single number or a list/tuple with length of image channels.'
  1007. 'But got {}'.format(fill)
  1008. )
  1009. orig_dtype = image.dtype
  1010. image = np.asarray(image, dtype=np.float)
  1011. hrg = shift[0]
  1012. wrg = shift[1]
  1013. tx = -np.random.uniform(-hrg, hrg) * w
  1014. ty = -np.random.uniform(-wrg, wrg) * h
  1015. shift_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
  1016. transform_matrix = transform_matrix_offset_center(shift_matrix, h, w)
  1017. shift_matrix = transform_matrix[:2, :2]
  1018. offset = transform_matrix[:2, 2]
  1019. image = np.rollaxis(image, 2, 0)
  1020. dummy = np.ones((1, image.shape[1], image.shape[2]), dtype=image.dtype)
  1021. image = np.concatenate((image, dummy), axis=0)
  1022. channel_images = [
  1023. ndimage.interpolation.affine_transform(x_channel, shift_matrix, offset, order=3, mode='constant', cval=0)
  1024. for x_channel in image
  1025. ]
  1026. image = np.stack(channel_images, axis=0)
  1027. image = np.rollaxis(image, 0, 3)
  1028. mask = image[:, :, -1:]
  1029. image = image[:, :, :-1]
  1030. mask = np.tile(mask, (1, 1, c))
  1031. fill = np.tile(fill, (h, w, 1))
  1032. if interpolation == 'nearest':
  1033. mask = mask < 0.5
  1034. image[mask] = fill[mask]
  1035. else:
  1036. image = image * mask + (1.0 - mask) * fill
  1037. image = np.asarray(image, dtype=orig_dtype)
  1038. image = ops.convert_to_tensor(image)
  1039. return image
  1040. def random_zoom(image, zoom, interpolation, fill):
  1041. if isinstance(image, (tf.Tensor, np.ndarray)) and len(image.shape) == 3:
  1042. image = np.asarray(image)
  1043. else:
  1044. 'Image should be a 3d tensor or np.ndarray.'
  1045. h, w, c = image.shape[0], image.shape[1], image.shape[2]
  1046. if interpolation not in ('nearest', 'bilinear'):
  1047. raise ValueError('Interpolation only support {\'nearest\', \'bilinear\'} .')
  1048. if not (isinstance(zoom, (tuple, list)) and len(zoom) == 2):
  1049. raise ValueError('Zoom should be a list/tuple with length of 2.' 'But got {}'.format(zoom))
  1050. if not (0 <= zoom[0] <= zoom[1]):
  1051. raise ValueError('Zoom values should be positive, and zoom[1] should be greater than zoom[0].')
  1052. if isinstance(fill, numbers.Number):
  1053. fill = (fill, ) * c
  1054. elif not (isinstance(fill, (list, tuple)) and len(fill) == c):
  1055. raise ValueError(
  1056. 'If fill should be a single number or a list/tuple with length of image channels.'
  1057. 'But got {}'.format(fill)
  1058. )
  1059. orig_dtype = image.dtype
  1060. image = np.asarray(image, dtype=np.float)
  1061. zoom_factor = 1 / np.random.uniform(zoom[0], zoom[1])
  1062. zoom_matrix = np.array([[zoom_factor, 0, 0], [0, zoom_factor, 0], [0, 0, 1]])
  1063. transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
  1064. zoom_matrix = transform_matrix[:2, :2]
  1065. offset = transform_matrix[:2, 2]
  1066. image = np.rollaxis(image, 2, 0)
  1067. dummy = np.ones((1, image.shape[1], image.shape[2]), dtype=image.dtype)
  1068. image = np.concatenate((image, dummy), axis=0)
  1069. channel_images = [
  1070. ndimage.interpolation.affine_transform(x_channel, zoom_matrix, offset, order=3, mode='constant', cval=0)
  1071. for x_channel in image
  1072. ]
  1073. image = np.stack(channel_images, axis=0)
  1074. image = np.rollaxis(image, 0, 3)
  1075. mask = image[:, :, -1:]
  1076. image = image[:, :, :-1]
  1077. mask = np.tile(mask, (1, 1, c))
  1078. fill = np.tile(fill, (h, w, 1))
  1079. if interpolation == 'nearest':
  1080. mask = mask < 0.5
  1081. image[mask] = fill[mask]
  1082. else:
  1083. image = image * mask + (1.0 - mask) * fill
  1084. image = np.asarray(image, dtype=orig_dtype)
  1085. image = ops.convert_to_tensor(image)
  1086. return image
  1087. def random_affine(image, degrees, shift, zoom, shear, interpolation, fill):
  1088. if isinstance(image, (tf.Tensor, np.ndarray)) and len(image.shape) == 3:
  1089. image = np.asarray(image)
  1090. else:
  1091. 'Image should be a 3d tensor or np.ndarray.'
  1092. h, w, c = image.shape[0], image.shape[1], image.shape[2]
  1093. if isinstance(fill, numbers.Number):
  1094. fill = (fill, ) * c
  1095. elif not (isinstance(fill, (list, tuple)) and len(fill) == c):
  1096. raise ValueError(
  1097. 'If fill should be a single number or a list/tuple with length of image channels.'
  1098. 'But got {}'.format(fill)
  1099. )
  1100. orig_dtype = image.dtype
  1101. image = np.asarray(image, dtype=np.float)
  1102. theta = np.random.uniform(degrees[0], degrees[1])
  1103. theta = np.deg2rad(theta)
  1104. rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]])
  1105. transform_matrix = rotation_matrix
  1106. if shift is not None:
  1107. max_dx = float(shift[0] * w)
  1108. max_dy = float(shift[1] * h)
  1109. tx = -int(round(np.random.uniform(-max_dx, max_dx)))
  1110. ty = -int(round(np.random.uniform(-max_dy, max_dy)))
  1111. shift_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
  1112. transform_matrix = np.dot(transform_matrix, shift_matrix)
  1113. if shear is not None:
  1114. shear_x = shear_y = 0
  1115. shear_x = float(np.random.uniform(shear[0], shear[1]))
  1116. if len(shear) == 4:
  1117. shear_y = float(np.random.uniform(shear[2], shear[3]))
  1118. shear_x = np.deg2rad(shear_x)
  1119. shear_y = np.deg2rad(shear_y)
  1120. shear_matrix = np.array(
  1121. [[math.cos(shear_y), math.sin(shear_y), 0], [math.sin(shear_x), math.cos(shear_x), 0], [0, 0, 1]]
  1122. )
  1123. transform_matrix = np.dot(transform_matrix, shear_matrix)
  1124. if zoom is not None:
  1125. zoom = 1 / float(np.random.uniform(zoom[0], zoom[1]))
  1126. zoom_matrix = np.array([[zoom, 0, 0], [0, zoom, 0], [0, 0, 1]])
  1127. transform_matrix = np.dot(transform_matrix, zoom_matrix)
  1128. transform_matrix = transform_matrix_offset_center(transform_matrix, h, w)
  1129. image = np.rollaxis(image, 2, 0)
  1130. finale_affine_matrix = transform_matrix[:2, :2]
  1131. finale_offset = transform_matrix[:2, 2]
  1132. dummy = np.ones((1, h, w), dtype=image.dtype)
  1133. image = np.concatenate((image, dummy), axis=0)
  1134. channel_images = [
  1135. ndimage.interpolation.affine_transform(
  1136. x_channel, finale_affine_matrix, finale_offset, order=3, mode='constant', cval=0
  1137. ) for x_channel in image
  1138. ]
  1139. image = np.stack(channel_images, axis=0)
  1140. image = np.rollaxis(image, 0, 3)
  1141. mask = image[:, :, -1:]
  1142. image = image[:, :, :-1]
  1143. mask = np.tile(mask, (1, 1, c))
  1144. fill = np.tile(fill, (h, w, 1))
  1145. if interpolation == 'nearest':
  1146. mask = mask < 0.5
  1147. image[mask] = fill[mask]
  1148. else:
  1149. image = image * mask + (1.0 - mask) * fill
  1150. image = np.asarray(image, dtype=orig_dtype)
  1151. image = ops.convert_to_tensor(image)
  1152. return image

TensorLayer3.0 是一款兼容多种深度学习框架为计算后端的深度学习库。计划兼容TensorFlow, Pytorch, MindSpore, Paddle.