You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_transforms.py 66 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493
  1. # Copyright 2019-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. The module vision.c_transforms is inherited from _c_dataengine
  17. and is implemented based on OpenCV in C++. It's a high performance module to
  18. process images. Users can apply suitable augmentations on image data
  19. to improve their training models.
  20. .. Note::
  21. A constructor's arguments for every class in this module must be saved into the
  22. class attributes (self.xxx) to support save() and load().
  23. Examples:
  24. >>> from mindspore.dataset.vision import Border, Inter
  25. >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
  26. >>> # create a dataset that reads all files in dataset_dir with 8 threads
  27. >>> image_folder_dataset = ds.ImageFolderDataset(image_folder_dataset_dir,
  28. ... num_parallel_workers=8)
  29. >>> # create a list of transformations to be applied to the image data
  30. >>> transforms_list = [c_vision.Decode(),
  31. ... c_vision.Resize((256, 256), interpolation=Inter.LINEAR),
  32. ... c_vision.RandomCrop(200, padding_mode=Border.EDGE),
  33. ... c_vision.RandomRotation((0, 15)),
  34. ... c_vision.Normalize((100, 115.0, 121.0), (71.0, 68.0, 70.0)),
  35. ... c_vision.HWC2CHW()]
  36. >>> onehot_op = c_transforms.OneHot(num_classes=10)
  37. >>> # apply the transformation to the dataset through data1.map()
  38. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  39. ... input_columns="image")
  40. >>> image_folder_dataset = image_folder_dataset.map(operations=onehot_op,
  41. ... input_columns="label")
  42. """
  43. import numbers
  44. import numpy as np
  45. from PIL import Image
  46. import mindspore._c_dataengine as cde
  47. from .utils import Inter, Border, ImageBatchFormat
  48. from .validators import check_prob, check_crop, check_resize_interpolation, check_random_resize_crop, \
  49. check_mix_up_batch_c, check_normalize_c, check_normalizepad_c, check_random_crop, check_random_color_adjust, \
  50. check_random_rotation, check_range, check_resize, check_rescale, check_pad, check_cutout, \
  51. check_uniform_augment_cpp, \
  52. check_bounding_box_augment_cpp, check_random_select_subpolicy_op, check_auto_contrast, check_random_affine, \
  53. check_random_solarize, check_soft_dvpp_decode_random_crop_resize_jpeg, check_positive_degrees, FLOAT_MAX_INTEGER, \
  54. check_cut_mix_batch_c, check_posterize
  55. from ..transforms.c_transforms import TensorOperation
  56. class ImageTensorOperation(TensorOperation):
  57. """
  58. Base class of Image Tensor Ops
  59. """
  60. def __call__(self, input_tensor):
  61. if not isinstance(input_tensor, list):
  62. input_list = [input_tensor]
  63. else:
  64. input_list = input_tensor
  65. tensor_list = []
  66. for tensor in input_list:
  67. if not isinstance(tensor, (np.ndarray, Image.Image)):
  68. raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(tensor)))
  69. tensor_list.append(cde.Tensor(np.asarray(tensor)))
  70. callable_op = cde.Execute(self.parse())
  71. output_list = callable_op(tensor_list)
  72. for i, element in enumerate(output_list):
  73. arr = element.as_array()
  74. if arr.dtype.char == 'S':
  75. output_list[i] = np.char.decode(arr)
  76. else:
  77. output_list[i] = arr
  78. if not isinstance(input_tensor, list) and len(output_list) == 1:
  79. output_list = output_list[0]
  80. return output_list
  81. def parse(self):
  82. raise NotImplementedError("ImageTensorOperation has to implement parse() method.")
  83. DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
  84. Border.EDGE: cde.BorderType.DE_BORDER_EDGE,
  85. Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
  86. Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
  87. DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
  88. ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}
  89. DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
  90. Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
  91. Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
  92. Inter.AREA: cde.InterpolationMode.DE_INTER_AREA}
  93. def parse_padding(padding):
  94. if isinstance(padding, numbers.Number):
  95. padding = [padding] * 4
  96. if len(padding) == 2:
  97. left = top = padding[0]
  98. right = bottom = padding[1]
  99. padding = (left, top, right, bottom,)
  100. if isinstance(padding, list):
  101. padding = tuple(padding)
  102. return padding
  103. class AutoContrast(ImageTensorOperation):
  104. """
  105. Apply automatic contrast on input image.
  106. Args:
  107. cutoff (float, optional): Percent of pixels to cut off from the histogram (default=0.0).
  108. ignore (Union[int, sequence], optional): Pixel values to ignore (default=None).
  109. Examples:
  110. >>> transforms_list = [c_vision.Decode(), c_vision.AutoContrast(cutoff=10.0, ignore=[10, 20])]
  111. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  112. ... input_columns=["image"])
  113. """
  114. @check_auto_contrast
  115. def __init__(self, cutoff=0.0, ignore=None):
  116. if ignore is None:
  117. ignore = []
  118. if isinstance(ignore, int):
  119. ignore = [ignore]
  120. self.cutoff = cutoff
  121. self.ignore = ignore
  122. def parse(self):
  123. return cde.AutoContrastOperation(self.cutoff, self.ignore)
  124. class BoundingBoxAugment(ImageTensorOperation):
  125. """
  126. Apply a given image transform on a random selection of bounding box regions of a given image.
  127. Args:
  128. transform: C++ transformation function to be applied on random selection
  129. of bounding box regions of a given image.
  130. ratio (float, optional): Ratio of bounding boxes to apply augmentation on.
  131. Range: [0, 1] (default=0.3).
  132. Examples:
  133. >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes
  134. >>> bbox_aug_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)
  135. >>> # map to apply ops
  136. >>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op],
  137. ... input_columns=["image", "bbox"],
  138. ... output_columns=["image", "bbox"],
  139. ... column_order=["image", "bbox"])
  140. """
  141. @check_bounding_box_augment_cpp
  142. def __init__(self, transform, ratio=0.3):
  143. self.ratio = ratio
  144. self.transform = transform
  145. def parse(self):
  146. if self.transform and getattr(self.transform, 'parse', None):
  147. transform = self.transform.parse()
  148. else:
  149. transform = self.transform
  150. return cde.BoundingBoxAugmentOperation(transform, self.ratio)
  151. class CenterCrop(ImageTensorOperation):
  152. """
  153. Crops the input image at the center to the given size.
  154. Args:
  155. size (Union[int, sequence]): The output size of the cropped image.
  156. If size is an integer, a square crop of size (size, size) is returned.
  157. If size is a sequence of length 2, it should be (height, width).
  158. Examples:
  159. >>> # crop image to a square
  160. >>> transforms_list1 = [c_vision.Decode(), c_vision.CenterCrop(50)]
  161. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  162. ... input_columns=["image"])
  163. >>> # crop image to portrait style
  164. >>> transforms_list2 = [c_vision.Decode(), c_vision.CenterCrop((60, 40))]
  165. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  166. ... input_columns=["image"])
  167. """
  168. @check_crop
  169. def __init__(self, size):
  170. if isinstance(size, int):
  171. size = (size, size)
  172. self.size = size
  173. def parse(self):
  174. return cde.CenterCropOperation(self.size)
  175. class CutMixBatch(ImageTensorOperation):
  176. """
  177. Apply CutMix transformation on input batch of images and labels.
  178. Note that you need to make labels into one-hot format and batch before calling this function.
  179. Args:
  180. image_batch_format (Image Batch Format): The method of padding. Can be any of
  181. [ImageBatchFormat.NHWC, ImageBatchFormat.NCHW]
  182. alpha (float, optional): hyperparameter of beta distribution (default = 1.0).
  183. prob (float, optional): The probability by which CutMix is applied to each image (default = 1.0).
  184. Examples:
  185. >>> from mindspore.dataset.vision import ImageBatchFormat
  186. >>> onehot_op = c_transforms.OneHot(num_classes=10)
  187. >>> image_folder_dataset= image_folder_dataset.map(operations=onehot_op,
  188. ... input_columns=["label"])
  189. >>> cutmix_batch_op = c_vision.CutMixBatch(ImageBatchFormat.NHWC, 1.0, 0.5)
  190. >>> image_folder_dataset = image_folder_dataset.batch(5)
  191. >>> image_folder_dataset = image_folder_dataset.map(operations=cutmix_batch_op,
  192. ... input_columns=["image", "label"])
  193. """
  194. @check_cut_mix_batch_c
  195. def __init__(self, image_batch_format, alpha=1.0, prob=1.0):
  196. self.image_batch_format = image_batch_format.value
  197. self.alpha = alpha
  198. self.prob = prob
  199. def parse(self):
  200. return cde.CutMixBatchOperation(DE_C_IMAGE_BATCH_FORMAT[self.image_batch_format], self.alpha, self.prob)
  201. class CutOut(ImageTensorOperation):
  202. """
  203. Randomly cut (mask) out a given number of square patches from the input NumPy image array.
  204. Args:
  205. length (int): The side length of each square patch.
  206. num_patches (int, optional): Number of patches to be cut out of an image (default=1).
  207. Examples:
  208. >>> transforms_list = [c_vision.Decode(), c_vision.CutOut(80, num_patches=10)]
  209. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  210. ... input_columns=["image"])
  211. """
  212. @check_cutout
  213. def __init__(self, length, num_patches=1):
  214. self.length = length
  215. self.num_patches = num_patches
  216. def parse(self):
  217. return cde.CutOutOperation(self.length, self.num_patches)
  218. class Decode(ImageTensorOperation):
  219. """
  220. Decode the input image in RGB mode.
  221. Args:
  222. rgb (bool, optional): Mode of decoding input image (default=True).
  223. If True means format of decoded image is RGB else BGR(deprecated).
  224. Examples:
  225. >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlip()]
  226. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  227. ... input_columns=["image"])
  228. """
  229. def __init__(self, rgb=True):
  230. self.rgb = rgb
  231. def __call__(self, img):
  232. """
  233. Call method.
  234. Args:
  235. img (NumPy): Image to be decoded.
  236. Returns:
  237. img (NumPy), Decoded image.
  238. """
  239. if not isinstance(img, np.ndarray) or img.ndim != 1 or img.dtype.type is np.str_:
  240. raise TypeError("Input should be an encoded image with 1-D NumPy type, got {}.".format(type(img)))
  241. decode = cde.Execute(cde.DecodeOperation(self.rgb))
  242. img = decode(cde.Tensor(np.asarray(img)))
  243. return img.as_array()
  244. def parse(self):
  245. return cde.DecodeOperation(self.rgb)
  246. class Equalize(ImageTensorOperation):
  247. """
  248. Apply histogram equalization on input image.
  249. Examples:
  250. >>> transforms_list = [c_vision.Decode(), c_vision.Equalize()]
  251. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  252. ... input_columns=["image"])
  253. """
  254. def parse(self):
  255. return cde.EqualizeOperation()
  256. class HWC2CHW(ImageTensorOperation):
  257. """
  258. Transpose the input image; shape (H, W, C) to shape (C, H, W).
  259. Examples:
  260. >>> transforms_list = [c_vision.Decode(),
  261. ... c_vision.RandomHorizontalFlip(0.75),
  262. ... c_vision.RandomCrop(512),
  263. ... c_vision.HWC2CHW()]
  264. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  265. ... input_columns=["image"])
  266. """
  267. def parse(self):
  268. return cde.HwcToChwOperation()
  269. class Invert(ImageTensorOperation):
  270. """
  271. Apply invert on input image in RGB mode.
  272. Examples:
  273. >>> transforms_list = [c_vision.Decode(), c_vision.Invert()]
  274. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  275. ... input_columns=["image"])
  276. """
  277. def parse(self):
  278. return cde.InvertOperation()
  279. class MixUpBatch(ImageTensorOperation):
  280. """
  281. Apply MixUp transformation on input batch of images and labels. Each image is multiplied by a random weight (lambda)
  282. and then added to a randomly selected image from the batch multiplied by (1 - lambda). The same formula is also
  283. applied to the one-hot labels.
  284. Note that you need to make labels into one-hot format and batch before calling this function.
  285. Args:
  286. alpha (float, optional): Hyperparameter of beta distribution (default = 1.0).
  287. Examples:
  288. >>> onehot_op = c_transforms.OneHot(num_classes=10)
  289. >>> image_folder_dataset= image_folder_dataset.map(operations=onehot_op,
  290. ... input_columns=["label"])
  291. >>> mixup_batch_op = c_vision.MixUpBatch(alpha=0.9)
  292. >>> image_folder_dataset = image_folder_dataset.batch(5)
  293. >>> image_folder_dataset = image_folder_dataset.map(operations=mixup_batch_op,
  294. ... input_columns=["image", "label"])
  295. """
  296. @check_mix_up_batch_c
  297. def __init__(self, alpha=1.0):
  298. self.alpha = alpha
  299. def parse(self):
  300. return cde.MixUpBatchOperation(self.alpha)
  301. class Normalize(ImageTensorOperation):
  302. """
  303. Normalize the input image with respect to mean and standard deviation.
  304. Args:
  305. mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
  306. The mean values must be in range [0.0, 255.0].
  307. std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
  308. The standard deviation values must be in range (0.0, 255.0].
  309. Examples:
  310. >>> decode_op = c_vision.Decode()
  311. >>> normalize_op = c_vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0])
  312. >>> transforms_list = [decode_op, normalize_op]
  313. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  314. ... input_columns=["image"])
  315. """
  316. @check_normalize_c
  317. def __init__(self, mean, std):
  318. if len(mean) == 1:
  319. mean = [mean[0]] * 3
  320. if len(std) == 1:
  321. std = [std[0]] * 3
  322. self.mean = mean
  323. self.std = std
  324. def parse(self):
  325. return cde.NormalizeOperation(self.mean, self.std)
  326. class NormalizePad(ImageTensorOperation):
  327. """
  328. Normalize the input image with respect to mean and standard deviation then pad an extra channel with value zero.
  329. Args:
  330. mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
  331. The mean values must be in range (0.0, 255.0].
  332. std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
  333. The standard deviation values must be in range (0.0, 255.0].
  334. dtype (str): Set the output data type of normalized image (default is "float32").
  335. Examples:
  336. >>> decode_op = c_vision.Decode()
  337. >>> normalize_pad_op = c_vision.NormalizePad(mean=[121.0, 115.0, 100.0],
  338. ... std=[70.0, 68.0, 71.0],
  339. ... dtype="float32")
  340. >>> transforms_list = [decode_op, normalize_pad_op]
  341. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  342. ... input_columns=["image"])
  343. """
  344. @check_normalizepad_c
  345. def __init__(self, mean, std, dtype="float32"):
  346. self.mean = mean
  347. self.std = std
  348. self.dtype = dtype
  349. def parse(self):
  350. return cde.NormalizePadOperation(self.mean, self.std, self.dtype)
  351. class Pad(ImageTensorOperation):
  352. """
  353. Pads the image according to padding parameters.
  354. Args:
  355. padding (Union[int, sequence]): The number of pixels to pad the image.
  356. If a single number is provided, it pads all borders with this value.
  357. If a tuple or list of 2 values are provided, it pads the (left and top)
  358. with the first value and (right and bottom) with the second value.
  359. If 4 values are provided as a list or tuple,
  360. it pads the left, top, right and bottom respectively.
  361. fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for
  362. padding_mode Border.CONSTANT (default=0).
  363. If it is an integer, it is used for all RGB channels.
  364. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
  365. The fill_value values must be in range [0, 255].
  366. padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of
  367. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].
  368. - Border.CONSTANT, means it fills the border with constant values.
  369. - Border.EDGE, means it pads with the last value on the edge.
  370. - Border.REFLECT, means it reflects the values on the edge omitting the last
  371. value of edge.
  372. - Border.SYMMETRIC, means it reflects the values on the edge repeating the last
  373. value of edge.
  374. Examples:
  375. >>> from mindspore.dataset.vision import Border
  376. >>> transforms_list = [c_vision.Decode(), c_vision.Pad([100, 100, 100, 100])]
  377. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  378. ... input_columns=["image"])
  379. """
  380. @check_pad
  381. def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT):
  382. padding = parse_padding(padding)
  383. if isinstance(fill_value, int):
  384. fill_value = tuple([fill_value] * 3)
  385. self.padding = padding
  386. self.fill_value = fill_value
  387. self.padding_mode = padding_mode
  388. def parse(self):
  389. return cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode])
  390. class RandomAffine(ImageTensorOperation):
  391. """
  392. Apply Random affine transformation to the input image.
  393. Args:
  394. degrees (int or float or sequence): Range of the rotation degrees.
  395. If degrees is a number, the range will be (-degrees, degrees).
  396. If degrees is a sequence, it should be (min, max).
  397. translate (sequence, optional): Sequence (tx_min, tx_max, ty_min, ty_max) of minimum/maximum translation in
  398. x(horizontal) and y(vertical) directions (default=None).
  399. The horizontal and vertical shift is selected randomly from the range:
  400. (tx_min*width, tx_max*width) and (ty_min*height, ty_max*height), respectively.
  401. If a tuple or list of size 2, then a translate parallel to the X axis in the range of
  402. (translate[0], translate[1]) is applied.
  403. If a tuple of list of size 4, then a translate parallel to the X axis in the range of
  404. (translate[0], translate[1]) and a translate parallel to the Y axis in the range of
  405. (translate[2], translate[3]) are applied.
  406. If None, no translation is applied.
  407. scale (sequence, optional): Scaling factor interval (default=None, original scale is used).
  408. shear (int or float or sequence, optional): Range of shear factor (default=None).
  409. If a number, then a shear parallel to the X axis in the range of (-shear, +shear) is applied.
  410. If a tuple or list of size 2, then a shear parallel to the X axis in the range of (shear[0], shear[1])
  411. is applied.
  412. If a tuple of list of size 4, then a shear parallel to X axis in the range of (shear[0], shear[1])
  413. and a shear parallel to Y axis in the range of (shear[2], shear[3]) is applied.
  414. If None, no shear is applied.
  415. resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST).
  416. If omitted, or if the image has mode "1" or "P", it is set to be Inter.NEAREST.
  417. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  418. - Inter.BILINEAR, means resample method is bilinear interpolation.
  419. - Inter.NEAREST, means resample method is nearest-neighbor interpolation.
  420. - Inter.BICUBIC, means resample method is bicubic interpolation.
  421. fill_value (tuple or int, optional): Optional fill_value to fill the area outside the transform
  422. in the output image. There must be three elements in tuple and the value of single element is [0, 255].
  423. Used only in Pillow versions > 5.0.0 (default=0, filling is performed).
  424. Raises:
  425. ValueError: If degrees is negative.
  426. ValueError: If translation value is not between -1 and 1.
  427. ValueError: If scale is not positive.
  428. ValueError: If shear is a number but is not positive.
  429. TypeError: If degrees is not a number or a list or a tuple.
  430. If degrees is a list or tuple, its length is not 2.
  431. TypeError: If translate is specified but is not list or a tuple of length 2 or 4.
  432. TypeError: If scale is not a list or tuple of length 2.''
  433. TypeError: If shear is not a list or tuple of length 2 or 4.
  434. TypeError: If fill_value is not a single integer or a 3-tuple.
  435. Examples:
  436. >>> from mindspore.dataset.vision import Inter
  437. >>> decode_op = c_vision.Decode()
  438. >>> random_affine_op = c_vision.RandomAffine(degrees=15,
  439. ... translate=(-0.1, 0.1, 0, 0),
  440. ... scale=(0.9, 1.1),
  441. ... resample=Inter.NEAREST)
  442. >>> transforms_list = [decode_op, random_affine_op]
  443. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  444. ... input_columns=["image"])
  445. """
  446. @check_random_affine
  447. def __init__(self, degrees, translate=None, scale=None, shear=None, resample=Inter.NEAREST, fill_value=0):
  448. # Parameter checking
  449. if shear is not None:
  450. if isinstance(shear, numbers.Number):
  451. shear = (-1 * shear, shear, 0., 0.)
  452. else:
  453. if len(shear) == 2:
  454. shear = [shear[0], shear[1], 0., 0.]
  455. elif len(shear) == 4:
  456. shear = [s for s in shear]
  457. if isinstance(degrees, numbers.Number):
  458. degrees = (-1 * degrees, degrees)
  459. if isinstance(fill_value, numbers.Number):
  460. fill_value = (fill_value, fill_value, fill_value)
  461. # translation
  462. if translate is None:
  463. translate = (0.0, 0.0, 0.0, 0.0)
  464. # scale
  465. if scale is None:
  466. scale = (1.0, 1.0)
  467. # shear
  468. if shear is None:
  469. shear = (0.0, 0.0, 0.0, 0.0)
  470. self.degrees = degrees
  471. self.translate = translate
  472. self.scale_ = scale
  473. self.shear = shear
  474. self.resample = DE_C_INTER_MODE[resample]
  475. self.fill_value = fill_value
  476. def parse(self):
  477. return cde.RandomAffineOperation(self.degrees, self.translate, self.scale_, self.shear, self.resample,
  478. self.fill_value)
  479. class RandomColor(ImageTensorOperation):
  480. """
  481. Adjust the color of the input image by a fixed or random degree.
  482. This operation works only with 3-channel color images.
  483. Args:
  484. degrees (sequence, optional): Range of random color adjustment degrees.
  485. It should be in (min, max) format. If min=max, then it is a
  486. single fixed magnitude operation (default=(0.1, 1.9)).
  487. Examples:
  488. >>> transforms_list = [c_vision.Decode(), c_vision.RandomColor((0.5, 2.0))]
  489. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  490. ... input_columns=["image"])
  491. """
  492. @check_positive_degrees
  493. def __init__(self, degrees=(0.1, 1.9)):
  494. self.degrees = degrees
  495. def parse(self):
  496. return cde.RandomColorOperation(*self.degrees)
  497. class RandomColorAdjust(ImageTensorOperation):
  498. """
  499. Randomly adjust the brightness, contrast, saturation, and hue of the input image.
  500. Args:
  501. brightness (Union[float, list, tuple], optional): Brightness adjustment factor (default=(1, 1)).
  502. Cannot be negative.
  503. If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness].
  504. If it is a sequence, it should be [min, max] for the range.
  505. contrast (Union[float, list, tuple], optional): Contrast adjustment factor (default=(1, 1)).
  506. Cannot be negative.
  507. If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast].
  508. If it is a sequence, it should be [min, max] for the range.
  509. saturation (Union[float, list, tuple], optional): Saturation adjustment factor (default=(1, 1)).
  510. Cannot be negative.
  511. If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation].
  512. If it is a sequence, it should be [min, max] for the range.
  513. hue (Union[float, list, tuple], optional): Hue adjustment factor (default=(0, 0)).
  514. If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5.
  515. If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5.
  516. Examples:
  517. >>> decode_op = c_vision.Decode()
  518. >>> transform_op = c_vision.RandomColorAdjust(brightness=(0.5, 1),
  519. ... contrast=(0.4, 1),
  520. ... saturation=(0.3, 1))
  521. >>> transforms_list = [decode_op, transform_op]
  522. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  523. ... input_columns=["image"])
  524. """
  525. @check_random_color_adjust
  526. def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)):
  527. brightness = self.expand_values(brightness)
  528. contrast = self.expand_values(contrast)
  529. saturation = self.expand_values(saturation)
  530. hue = self.expand_values(hue, center=0, bound=(-0.5, 0.5), non_negative=False)
  531. self.brightness = brightness
  532. self.contrast = contrast
  533. self.saturation = saturation
  534. self.hue = hue
  535. def expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True):
  536. if isinstance(value, numbers.Number):
  537. value = [center - value, center + value]
  538. if non_negative:
  539. value[0] = max(0, value[0])
  540. check_range(value, bound)
  541. return (value[0], value[1])
  542. def parse(self):
  543. return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue)
  544. class RandomCrop(ImageTensorOperation):
  545. """
  546. Crop the input image at a random location.
  547. Args:
  548. size (Union[int, sequence]): The output size of the cropped image.
  549. If size is an integer, a square crop of size (size, size) is returned.
  550. If size is a sequence of length 2, it should be (height, width).
  551. padding (Union[int, sequence], optional): The number of pixels to pad the image (default=None).
  552. If padding is not None, pad image firstly with padding values.
  553. If a single number is provided, pad all borders with this value.
  554. If a tuple or list of 2 values are provided, pad the (left and top)
  555. with the first value and (right and bottom) with the second value.
  556. If 4 values are provided as a list or tuple,
  557. pad the left, top, right and bottom respectively.
  558. pad_if_needed (bool, optional): Pad the image if either side is smaller than
  559. the given output size (default=False).
  560. fill_value (Union[int, tuple], optional): The pixel intensity of the borders if
  561. the padding_mode is Border.CONSTANT (default=0). If it is a 3-tuple, it is used to
  562. fill R, G, B channels respectively.
  563. padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of
  564. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].
  565. - Border.CONSTANT, means it fills the border with constant values.
  566. - Border.EDGE, means it pads with the last value on the edge.
  567. - Border.REFLECT, means it reflects the values on the edge omitting the last
  568. value of edge.
  569. - Border.SYMMETRIC, means it reflects the values on the edge repeating the last
  570. value of edge.
  571. Examples:
  572. >>> from mindspore.dataset.vision import Border
  573. >>> decode_op = c_vision.Decode()
  574. >>> random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=Border.EDGE)
  575. >>> transforms_list = [decode_op, random_crop_op]
  576. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  577. ... input_columns=["image"])
  578. """
  579. @check_random_crop
  580. def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
  581. if isinstance(size, int):
  582. size = (size, size)
  583. if padding is None:
  584. padding = (0, 0, 0, 0)
  585. else:
  586. padding = parse_padding(padding)
  587. if isinstance(fill_value, int):
  588. fill_value = tuple([fill_value] * 3)
  589. self.size = size
  590. self.padding = padding
  591. self.pad_if_needed = pad_if_needed
  592. self.fill_value = fill_value
  593. self.padding_mode = padding_mode.value
  594. def parse(self):
  595. border_type = DE_C_BORDER_TYPE[self.padding_mode]
  596. return cde.RandomCropOperation(self.size, self.padding, self.pad_if_needed, self.fill_value, border_type)
  597. class RandomCropDecodeResize(ImageTensorOperation):
  598. """
  599. A combination of `Crop`, `Decode` and `Resize`. It will get better performance for JPEG images.
  600. Args:
  601. size (Union[int, sequence]): The size of the output image.
  602. If size is an integer, a square crop of size (size, size) is returned.
  603. If size is a sequence of length 2, it should be (height, width).
  604. scale (tuple, optional): Range [min, max) of respective size of the
  605. original size to be cropped (default=(0.08, 1.0)).
  606. ratio (tuple, optional): Range [min, max) of aspect ratio to be
  607. cropped (default=(3. / 4., 4. / 3.)).
  608. interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR).
  609. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  610. - Inter.BILINEAR, means interpolation method is bilinear interpolation.
  611. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  612. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  613. max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10).
  614. If exceeded, fall back to use center_crop instead.
  615. Examples:
  616. >>> from mindspore.dataset.vision import Inter
  617. >>> resize_crop_decode_op = c_vision.RandomCropDecodeResize(size=(50, 75),
  618. ... scale=(0.25, 0.5),
  619. ... interpolation=Inter.NEAREST,
  620. ... max_attempts=5)
  621. >>> transforms_list = [resize_crop_decode_op]
  622. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  623. ... input_columns=["image"])
  624. """
  625. @check_random_resize_crop
  626. def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
  627. interpolation=Inter.BILINEAR, max_attempts=10):
  628. if isinstance(size, int):
  629. size = (size, size)
  630. self.size = size
  631. self.scale = scale
  632. self.ratio = ratio
  633. self.interpolation = interpolation
  634. self.max_attempts = max_attempts
  635. def parse(self):
  636. return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio,
  637. DE_C_INTER_MODE[self.interpolation],
  638. self.max_attempts)
  639. class RandomCropWithBBox(ImageTensorOperation):
  640. """
  641. Crop the input image at a random location and adjust bounding boxes accordingly.
  642. Args:
  643. size (Union[int, sequence]): The output size of the cropped image.
  644. If size is an integer, a square crop of size (size, size) is returned.
  645. If size is a sequence of length 2, it should be (height, width).
  646. padding (Union[int, sequence], optional): The number of pixels to pad the image (default=None).
  647. If padding is not None, first pad image with padding values.
  648. If a single number is provided, pad all borders with this value.
  649. If a tuple or list of 2 values are provided, pad the (left and top)
  650. with the first value and (right and bottom) with the second value.
  651. If 4 values are provided as a list or tuple, pad the left, top, right and bottom respectively.
  652. pad_if_needed (bool, optional): Pad the image if either side is smaller than
  653. the given output size (default=False).
  654. fill_value (Union[int, tuple], optional): The pixel intensity of the borders if
  655. the padding_mode is Border.CONSTANT (default=0). If it is a 3-tuple, it is used to
  656. fill R, G, B channels respectively.
  657. padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of
  658. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].
  659. - Border.CONSTANT, means it fills the border with constant values.
  660. - Border.EDGE, means it pads with the last value on the edge.
  661. - Border.REFLECT, means it reflects the values on the edge omitting the last
  662. value of edge.
  663. - Border.SYMMETRIC, means it reflects the values on the edge repeating the last
  664. value of edge.
  665. Examples:
  666. >>> decode_op = c_vision.Decode()
  667. >>> random_crop_with_bbox_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
  668. >>> transforms_list = [decode_op, random_crop_with_bbox_op]
  669. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  670. ... input_columns=["image"])
  671. """
  672. @check_random_crop
  673. def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
  674. if isinstance(size, int):
  675. size = (size, size)
  676. if padding is None:
  677. padding = (0, 0, 0, 0)
  678. else:
  679. padding = parse_padding(padding)
  680. if isinstance(fill_value, int):
  681. fill_value = tuple([fill_value] * 3)
  682. self.size = size
  683. self.padding = padding
  684. self.pad_if_needed = pad_if_needed
  685. self.fill_value = fill_value
  686. self.padding_mode = padding_mode.value
  687. def parse(self):
  688. border_type = DE_C_BORDER_TYPE[self.padding_mode]
  689. return cde.RandomCropWithBBoxOperation(self.size, self.padding, self.pad_if_needed, self.fill_value,
  690. border_type)
  691. class RandomHorizontalFlip(ImageTensorOperation):
  692. """
  693. Randomly flip the input image horizontally with a given probability.
  694. Args:
  695. prob (float, optional): Probability of the image being flipped (default=0.5).
  696. Examples:
  697. >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlip(0.75)]
  698. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  699. ... input_columns=["image"])
  700. """
  701. @check_prob
  702. def __init__(self, prob=0.5):
  703. self.prob = prob
  704. def parse(self):
  705. return cde.RandomHorizontalFlipOperation(self.prob)
  706. class RandomHorizontalFlipWithBBox(ImageTensorOperation):
  707. """
  708. Flip the input image horizontally, randomly with a given probability and adjust bounding boxes accordingly.
  709. Args:
  710. prob (float, optional): Probability of the image being flipped (default=0.5).
  711. Examples:
  712. >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlipWithBBox(0.70)]
  713. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  714. ... input_columns=["image"])
  715. """
  716. @check_prob
  717. def __init__(self, prob=0.5):
  718. self.prob = prob
  719. def parse(self):
  720. return cde.RandomHorizontalFlipWithBBoxOperation(self.prob)
  721. class RandomPosterize(ImageTensorOperation):
  722. """
  723. Reduce the number of bits for each color channel.
  724. Args:
  725. bits (sequence or int, optional): Range of random posterize to compress image.
  726. Bits values must be in range of [1,8], and include at
  727. least one integer value in the given range. It must be in
  728. (min, max) or integer format. If min=max, then it is a single fixed
  729. magnitude operation (default=(8, 8)).
  730. Examples:
  731. >>> transforms_list = [c_vision.Decode(), c_vision.RandomPosterize((6, 8))]
  732. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  733. ... input_columns=["image"])
  734. """
  735. @check_posterize
  736. def __init__(self, bits=(8, 8)):
  737. self.bits = bits
  738. def parse(self):
  739. bits = self.bits
  740. if isinstance(bits, int):
  741. bits = (bits, bits)
  742. return cde.RandomPosterizeOperation(bits)
  743. class RandomResizedCrop(ImageTensorOperation):
  744. """
  745. Crop the input image to a random size and aspect ratio.
  746. Args:
  747. size (Union[int, sequence]): The size of the output image.
  748. If size is an integer, a square crop of size (size, size) is returned.
  749. If size is a sequence of length 2, it should be (height, width).
  750. scale (tuple, optional): Range [min, max) of respective size of the original
  751. size to be cropped (default=(0.08, 1.0)).
  752. ratio (tuple, optional): Range [min, max) of aspect ratio to be cropped
  753. (default=(3. / 4., 4. / 3.)).
  754. interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR).
  755. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  756. - Inter.BILINEAR, means interpolation method is bilinear interpolation.
  757. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  758. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  759. max_attempts (int, optional): The maximum number of attempts to propose a valid
  760. crop_area (default=10). If exceeded, fall back to use center_crop instead.
  761. Examples:
  762. >>> from mindspore.dataset.vision import Inter
  763. >>> decode_op = c_vision.Decode()
  764. >>> resize_crop_op = c_vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5),
  765. ... interpolation=Inter.BILINEAR)
  766. >>> transforms_list = [decode_op, resize_crop_op]
  767. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  768. ... input_columns=["image"])
  769. """
  770. @check_random_resize_crop
  771. def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
  772. interpolation=Inter.BILINEAR, max_attempts=10):
  773. if isinstance(size, int):
  774. size = (size, size)
  775. self.size = size
  776. self.scale = scale
  777. self.ratio = ratio
  778. self.interpolation = interpolation
  779. self.max_attempts = max_attempts
  780. def parse(self):
  781. return cde.RandomResizedCropOperation(self.size, self.scale, self.ratio, DE_C_INTER_MODE[self.interpolation],
  782. self.max_attempts)
  783. class RandomResizedCropWithBBox(ImageTensorOperation):
  784. """
  785. Crop the input image to a random size and aspect ratio and adjust bounding boxes accordingly.
  786. Args:
  787. size (Union[int, sequence]): The size of the output image.
  788. If size is an integer, a square crop of size (size, size) is returned.
  789. If size is a sequence of length 2, it should be (height, width).
  790. scale (tuple, optional): Range (min, max) of respective size of the original
  791. size to be cropped (default=(0.08, 1.0)).
  792. ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped
  793. (default=(3. / 4., 4. / 3.)).
  794. interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR).
  795. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  796. - Inter.BILINEAR, means interpolation method is bilinear interpolation.
  797. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  798. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  799. max_attempts (int, optional): The maximum number of attempts to propose a valid
  800. crop area (default=10). If exceeded, fall back to use center crop instead.
  801. Examples:
  802. >>> from mindspore.dataset.vision import Inter
  803. >>> decode_op = c_vision.Decode()
  804. >>> bbox_op = c_vision.RandomResizedCropWithBBox(size=50, interpolation=Inter.NEAREST)
  805. >>> transforms_list = [decode_op, bbox_op]
  806. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  807. ... input_columns=["image"])
  808. """
  809. @check_random_resize_crop
  810. def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
  811. interpolation=Inter.BILINEAR, max_attempts=10):
  812. if isinstance(size, int):
  813. size = (size, size)
  814. self.size = size
  815. self.scale = scale
  816. self.ratio = ratio
  817. self.interpolation = interpolation
  818. self.max_attempts = max_attempts
  819. def parse(self):
  820. return cde.RandomResizedCropWithBBoxOperation(self.size, self.scale, self.ratio,
  821. DE_C_INTER_MODE[self.interpolation], self.max_attempts)
  822. class RandomResize(ImageTensorOperation):
  823. """
  824. Tensor operation to resize the input image using a randomly selected interpolation mode.
  825. Args:
  826. size (Union[int, sequence]): The output size of the resized image.
  827. If size is an integer, smaller edge of the image will be resized to this value with
  828. the same image aspect ratio.
  829. If size is a sequence of length 2, it should be (height, width).
  830. Examples:
  831. >>> # randomly resize image, keeping aspect ratio
  832. >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResize(50)]
  833. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  834. ... input_columns=["image"])
  835. >>> # randomly resize image to landscape style
  836. >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResize((40, 60))]
  837. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  838. ... input_columns=["image"])
  839. """
  840. @check_resize
  841. def __init__(self, size):
  842. self.size = size
  843. def parse(self):
  844. size = self.size
  845. if isinstance(size, int):
  846. size = (size,)
  847. return cde.RandomResizeOperation(size)
  848. class RandomResizeWithBBox(ImageTensorOperation):
  849. """
  850. Tensor operation to resize the input image using a randomly selected interpolation mode and adjust
  851. bounding boxes accordingly.
  852. Args:
  853. size (Union[int, sequence]): The output size of the resized image.
  854. If size is an integer, smaller edge of the image will be resized to this value with
  855. the same image aspect ratio.
  856. If size is a sequence of length 2, it should be (height, width).
  857. Examples:
  858. >>> # randomly resize image with bounding boxes, keeping aspect ratio
  859. >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResizeWithBBox(60)]
  860. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  861. ... input_columns=["image"])
  862. >>> # randomly resize image with bounding boxes to portrait style
  863. >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResizeWithBBox((80, 60))]
  864. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  865. ... input_columns=["image"])
  866. """
  867. @check_resize
  868. def __init__(self, size):
  869. self.size = size
  870. def parse(self):
  871. size = self.size
  872. if isinstance(size, int):
  873. size = (size,)
  874. return cde.RandomResizeWithBBoxOperation(size)
  875. class RandomRotation(ImageTensorOperation):
  876. """
  877. Rotate the input image by a random angle.
  878. Args:
  879. degrees (Union[int, float, sequence): Range of random rotation degrees.
  880. If degrees is a number, the range will be converted to (-degrees, degrees).
  881. If degrees is a sequence, it should be (min, max).
  882. resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST).
  883. If omitted, or if the image has mode "1" or "P", it is set to be Inter.NEAREST.
  884. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  885. - Inter.BILINEAR, means resample method is bilinear interpolation.
  886. - Inter.NEAREST, means resample method is nearest-neighbor interpolation.
  887. - Inter.BICUBIC, means resample method is bicubic interpolation.
  888. expand (bool, optional): Optional expansion flag (default=False). If set to True, expand the output
  889. image to make it large enough to hold the entire rotated image.
  890. If set to False or omitted, make the output image the same size as the input.
  891. Note that the expand flag assumes rotation around the center and no translation.
  892. center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None).
  893. Origin is the top left corner. None sets to the center of the image.
  894. fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image
  895. (default=0).
  896. If it is a 3-tuple, it is used for R, G, B channels respectively.
  897. If it is an integer, it is used for all RGB channels.
  898. Examples:
  899. >>> from mindspore.dataset.vision import Inter
  900. >>> transforms_list = [c_vision.Decode(),
  901. ... c_vision.RandomRotation(degrees=5.0,
  902. ... resample=Inter.NEAREST,
  903. ... expand=True)]
  904. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  905. ... input_columns=["image"])
  906. """
  907. @check_random_rotation
  908. def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0):
  909. self.degrees = degrees
  910. self.resample = resample
  911. self.expand = expand
  912. self.center = center
  913. self.fill_value = fill_value
  914. def parse(self):
  915. degrees = (-self.degrees, self.degrees) if isinstance(self.degrees, numbers.Number) else self.degrees
  916. interpolation = DE_C_INTER_MODE[self.resample]
  917. expand = self.expand
  918. center = (-1, -1) if self.center is None else self.center
  919. fill_value = tuple([self.fill_value] * 3) if isinstance(self.fill_value, int) else self.fill_value
  920. return cde.RandomRotationOperation(degrees, interpolation, expand, center, fill_value)
  921. class RandomSelectSubpolicy(ImageTensorOperation):
  922. """
  923. Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples
  924. (op, prob), where op is a TensorOp operation and prob is the probability that this op will be applied. Once
  925. a sub-policy is selected, each op within the subpolicy with be applied in sequence according to its probability.
  926. Args:
  927. policy (list(list(tuple(TensorOp, float))): List of sub-policies to choose from.
  928. Examples:
  929. >>> policy = [[(c_vision.RandomRotation((45, 45)), 0.5),
  930. ... (c_vision.RandomVerticalFlip(), 1),
  931. ... (c_vision.RandomColorAdjust(), 0.8)],
  932. ... [(c_vision.RandomRotation((90, 90)), 1),
  933. ... (c_vision.RandomColorAdjust(), 0.2)]]
  934. >>> image_folder_dataset_1 = image_folder_dataset.map(operations=c_vision.RandomSelectSubpolicy(policy),
  935. ... input_columns=["image"])
  936. """
  937. @check_random_select_subpolicy_op
  938. def __init__(self, policy):
  939. self.policy = policy
  940. def parse(self):
  941. """
  942. Return a C++ representation of the operator for execution
  943. """
  944. policy = []
  945. for list_one in self.policy:
  946. policy_one = []
  947. for list_two in list_one:
  948. if list_two[0] and getattr(list_two[0], 'parse', None):
  949. policy_one.append((list_two[0].parse(), list_two[1]))
  950. else:
  951. policy_one.append((list_two[0], list_two[1]))
  952. policy.append(policy_one)
  953. return cde.RandomSelectSubpolicyOperation(policy)
  954. class RandomSharpness(ImageTensorOperation):
  955. """
  956. Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image,
  957. degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image.
  958. Args:
  959. degrees (tuple, optional): Range of random sharpness adjustment degrees. It should be in (min, max) format.
  960. If min=max, then it is a single fixed magnitude operation (default = (0.1, 1.9)).
  961. Raises:
  962. TypeError : If degrees is not a list or tuple.
  963. ValueError: If degrees is negative.
  964. ValueError: If degrees is in (max, min) format instead of (min, max).
  965. Examples:
  966. >>> transforms_list = [c_vision.Decode(), c_vision.RandomSharpness(degrees=(0.2, 1.9))]
  967. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  968. ... input_columns=["image"])
  969. """
  970. @check_positive_degrees
  971. def __init__(self, degrees=(0.1, 1.9)):
  972. self.degrees = degrees
  973. def parse(self):
  974. return cde.RandomSharpnessOperation(self.degrees)
  975. class RandomSolarize(ImageTensorOperation):
  976. """
  977. Invert all pixel values with given range.
  978. Args:
  979. threshold (tuple, optional): Range of random solarize threshold. Threshold values should always be
  980. in the range (0, 255), include at least one integer value in the given range and be in
  981. (min, max) format. If min=max, then invert all pixel values above min(max) (default=(0, 255)).
  982. Examples:
  983. >>> transforms_list = [c_vision.Decode(), c_vision.RandomSolarize(threshold=(10,100))]
  984. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  985. ... input_columns=["image"])
  986. """
  987. @check_random_solarize
  988. def __init__(self, threshold=(0, 255)):
  989. self.threshold = threshold
  990. def parse(self):
  991. return cde.RandomSolarizeOperation(self.threshold)
  992. class RandomVerticalFlip(ImageTensorOperation):
  993. """
  994. Randomly flip the input image vertically with a given probability.
  995. Args:
  996. prob (float, optional): Probability of the image being flipped (default=0.5).
  997. Examples:
  998. >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlip(0.25)]
  999. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1000. ... input_columns=["image"])
  1001. """
  1002. @check_prob
  1003. def __init__(self, prob=0.5):
  1004. self.prob = prob
  1005. def parse(self):
  1006. return cde.RandomVerticalFlipOperation(self.prob)
  1007. class RandomVerticalFlipWithBBox(ImageTensorOperation):
  1008. """
  1009. Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly.
  1010. Args:
  1011. prob (float, optional): Probability of the image being flipped (default=0.5).
  1012. Examples:
  1013. >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlipWithBBox(0.20)]
  1014. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1015. ... input_columns=["image"])
  1016. """
  1017. @check_prob
  1018. def __init__(self, prob=0.5):
  1019. self.prob = prob
  1020. def parse(self):
  1021. return cde.RandomVerticalFlipWithBBoxOperation(self.prob)
  1022. class Rescale(ImageTensorOperation):
  1023. """
  1024. Tensor operation to rescale the input image.
  1025. Args:
  1026. rescale (float): Rescale factor.
  1027. shift (float): Shift factor.
  1028. Examples:
  1029. >>> transforms_list = [c_vision.Decode(), c_vision.Rescale(1.0 / 255.0, -1.0)]
  1030. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1031. ... input_columns=["image"])
  1032. """
  1033. @check_rescale
  1034. def __init__(self, rescale, shift):
  1035. self.rescale = rescale
  1036. self.shift = shift
  1037. def parse(self):
  1038. return cde.RescaleOperation(self.rescale, self.shift)
  1039. class Resize(ImageTensorOperation):
  1040. """
  1041. Resize the input image to the given size.
  1042. Args:
  1043. size (Union[int, sequence]): The output size of the resized image.
  1044. If size is an integer, the smaller edge of the image will be resized to this value with
  1045. the same image aspect ratio.
  1046. If size is a sequence of length 2, it should be (height, width).
  1047. interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR).
  1048. It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC].
  1049. - Inter.LINEAR, means interpolation method is bilinear interpolation.
  1050. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  1051. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  1052. - Inter.AREA, means interpolation method is pixel area interpolation.
  1053. Examples:
  1054. >>> from mindspore.dataset.vision import Inter
  1055. >>> decode_op = c_vision.Decode()
  1056. >>> resize_op = c_vision.Resize([100, 75], Inter.BICUBIC)
  1057. >>> transforms_list = [decode_op, resize_op]
  1058. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1059. ... input_columns=["image"])
  1060. """
  1061. @check_resize_interpolation
  1062. def __init__(self, size, interpolation=Inter.LINEAR):
  1063. if isinstance(size, int):
  1064. size = (size,)
  1065. self.size = size
  1066. self.interpolation = interpolation
  1067. def parse(self):
  1068. return cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation])
  1069. class ResizeWithBBox(ImageTensorOperation):
  1070. """
  1071. Resize the input image to the given size and adjust bounding boxes accordingly.
  1072. Args:
  1073. size (Union[int, sequence]): The output size of the resized image.
  1074. If size is an integer, smaller edge of the image will be resized to this value with
  1075. the same image aspect ratio.
  1076. If size is a sequence of length 2, it should be (height, width).
  1077. interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR).
  1078. It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC].
  1079. - Inter.LINEAR, means interpolation method is bilinear interpolation.
  1080. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  1081. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  1082. Examples:
  1083. >>> from mindspore.dataset.vision import Inter
  1084. >>> decode_op = c_vision.Decode()
  1085. >>> bbox_op = c_vision.ResizeWithBBox(50, Inter.NEAREST)
  1086. >>> transforms_list = [decode_op, bbox_op]
  1087. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1088. ... input_columns=["image"])
  1089. """
  1090. @check_resize_interpolation
  1091. def __init__(self, size, interpolation=Inter.LINEAR):
  1092. self.size = size
  1093. self.interpolation = interpolation
  1094. def parse(self):
  1095. size = self.size
  1096. if isinstance(size, int):
  1097. size = (size,)
  1098. return cde.ResizeWithBBoxOperation(size, DE_C_INTER_MODE[self.interpolation])
  1099. class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation):
  1100. """
  1101. Tensor operation to decode, random crop and resize JPEG image using the simulation algorithm of
  1102. Ascend series chip DVPP module.
  1103. The usage scenario is consistent with SoftDvppDecodeResizeJpeg.
  1104. The input image size should be in range [32*32, 8192*8192].
  1105. The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  1106. Only images with an even resolution can be output. The output of odd resolution is not supported.
  1107. Args:
  1108. size (Union[int, sequence]): The size of the output image.
  1109. If size is an integer, a square crop of size (size, size) is returned.
  1110. If size is a sequence of length 2, it should be (height, width).
  1111. scale (tuple, optional): Range [min, max) of respective size of the
  1112. original size to be cropped (default=(0.08, 1.0)).
  1113. ratio (tuple, optional): Range [min, max) of aspect ratio to be
  1114. cropped (default=(3. / 4., 4. / 3.)).
  1115. max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10).
  1116. If exceeded, fall back to use center_crop instead.
  1117. Examples:
  1118. >>> # decode, randomly crop and resize image, keeping aspect ratio
  1119. >>> transforms_list1 = [c_vision.Decode(), c_vision.SoftDvppDecodeRandomCropResizeJpeg(90)]
  1120. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  1121. ... input_columns=["image"])
  1122. >>> # decode, randomly crop and resize to landscape style
  1123. >>> transforms_list2 = [c_vision.Decode(), c_vision.SoftDvppDecodeRandomCropResizeJpeg((80, 100))]
  1124. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  1125. ... input_columns=["image"])
  1126. """
  1127. @check_soft_dvpp_decode_random_crop_resize_jpeg
  1128. def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), max_attempts=10):
  1129. if isinstance(size, int):
  1130. size = (size, size)
  1131. self.size = size
  1132. self.scale = scale
  1133. self.ratio = ratio
  1134. self.max_attempts = max_attempts
  1135. def parse(self):
  1136. return cde.SoftDvppDecodeRandomCropResizeJpegOperation(self.size, self.scale, self.ratio, self.max_attempts)
  1137. class SoftDvppDecodeResizeJpeg(ImageTensorOperation):
  1138. """
  1139. Tensor operation to decode and resize JPEG image using the simulation algorithm of
  1140. Ascend series chip DVPP module.
  1141. It is recommended to use this algorithm in the following scenarios:
  1142. When training, the DVPP of the Ascend chip is not used,
  1143. and the DVPP of the Ascend chip is used during inference,
  1144. and the accuracy of inference is lower than the accuracy of training;
  1145. and the input image size should be in range [32*32, 8192*8192].
  1146. The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  1147. Only images with an even resolution can be output. The output of odd resolution is not supported.
  1148. Args:
  1149. size (Union[int, sequence]): The output size of the resized image.
  1150. If size is an integer, smaller edge of the image will be resized to this value with
  1151. the same image aspect ratio.
  1152. If size is a sequence of length 2, it should be (height, width).
  1153. Examples:
  1154. >>> # decode and resize image, keeping aspect ratio
  1155. >>> transforms_list1 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg(70)]
  1156. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  1157. ... input_columns=["image"])
  1158. >>> # decode and resize to portrait style
  1159. >>> transforms_list2 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg((80, 60))]
  1160. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  1161. ... input_columns=["image"])
  1162. """
  1163. @check_resize
  1164. def __init__(self, size):
  1165. if isinstance(size, int):
  1166. size = (size,)
  1167. self.size = size
  1168. def parse(self):
  1169. return cde.SoftDvppDecodeResizeJpegOperation(self.size)
  1170. class UniformAugment(ImageTensorOperation):
  1171. """
  1172. Tensor operation to perform randomly selected augmentation.
  1173. Args:
  1174. transforms: List of C++ operations (Python operations are not accepted).
  1175. num_ops (int, optional): Number of operations to be selected and applied (default=2).
  1176. Examples:
  1177. >>> import mindspore.dataset.vision.py_transforms as py_vision
  1178. >>> transforms_list = [c_vision.RandomHorizontalFlip(),
  1179. ... c_vision.RandomVerticalFlip(),
  1180. ... c_vision.RandomColorAdjust(),
  1181. ... c_vision.RandomRotation(degrees=45)]
  1182. >>> uni_aug_op = c_vision.UniformAugment(transforms=transforms_list, num_ops=2)
  1183. >>> transforms_all = [c_vision.Decode(), c_vision.Resize(size=[224, 224]),
  1184. ... uni_aug_op, py_vision.ToTensor()]
  1185. >>> image_folder_dataset_1 = image_folder_dataset.map(operations=transforms_all,
  1186. ... input_columns="image",
  1187. ... num_parallel_workers=1)
  1188. """
  1189. @check_uniform_augment_cpp
  1190. def __init__(self, transforms, num_ops=2):
  1191. self.transforms = transforms
  1192. self.num_ops = num_ops
  1193. def parse(self):
  1194. transforms = []
  1195. for op in self.transforms:
  1196. if op and getattr(op, 'parse', None):
  1197. transforms.append(op.parse())
  1198. else:
  1199. transforms.append(op)
  1200. return cde.UniformAugOperation(transforms, self.num_ops)