You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_transforms.py 91 kB

5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037
  1. # Copyright 2019-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. The module vision.c_transforms is inherited from _c_dataengine
  17. and is implemented based on OpenCV in C++. It's a high performance module to
  18. process images. Users can apply suitable augmentations on image data
  19. to improve their training models.
  20. .. Note::
  21. A constructor's arguments for every class in this module must be saved into the
  22. class attributes (self.xxx) to support save() and load().
  23. Examples:
  24. >>> from mindspore.dataset.vision import Border, Inter
  25. >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
  26. >>> # create a dataset that reads all files in dataset_dir with 8 threads
  27. >>> image_folder_dataset = ds.ImageFolderDataset(image_folder_dataset_dir,
  28. ... num_parallel_workers=8)
  29. >>> # create a list of transformations to be applied to the image data
  30. >>> transforms_list = [c_vision.Decode(),
  31. ... c_vision.Resize((256, 256), interpolation=Inter.LINEAR),
  32. ... c_vision.RandomCrop(200, padding_mode=Border.EDGE),
  33. ... c_vision.RandomRotation((0, 15)),
  34. ... c_vision.Normalize((100, 115.0, 121.0), (71.0, 68.0, 70.0)),
  35. ... c_vision.HWC2CHW()]
  36. >>> onehot_op = c_transforms.OneHot(num_classes=10)
  37. >>> # apply the transformation to the dataset through data1.map()
  38. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  39. ... input_columns="image")
  40. >>> image_folder_dataset = image_folder_dataset.map(operations=onehot_op,
  41. ... input_columns="label")
  42. """
  43. import numbers
  44. import numpy as np
  45. from PIL import Image
  46. import mindspore._c_dataengine as cde
  47. from .utils import Inter, Border, ImageBatchFormat, ConvertMode, SliceMode, AutoAugmentPolicy
  48. from .validators import check_prob, check_crop, check_center_crop, check_resize_interpolation, \
  49. check_mix_up_batch_c, check_normalize_c, check_normalizepad_c, check_random_crop, check_random_color_adjust, \
  50. check_random_rotation, check_range, check_resize, check_rescale, check_pad, check_cutout, check_alpha, \
  51. check_uniform_augment_cpp, check_convert_color, check_random_resize_crop, check_random_auto_contrast, \
  52. check_random_adjust_sharpness, check_auto_augment, \
  53. check_bounding_box_augment_cpp, check_random_select_subpolicy_op, check_auto_contrast, check_random_affine, \
  54. check_random_solarize, check_soft_dvpp_decode_random_crop_resize_jpeg, check_positive_degrees, FLOAT_MAX_INTEGER, \
  55. check_cut_mix_batch_c, check_posterize, check_gaussian_blur, check_rotate, check_slice_patches, check_adjust_gamma
  56. from ..transforms.c_transforms import TensorOperation
  57. class ImageTensorOperation(TensorOperation):
  58. """
  59. Base class of Image Tensor Ops
  60. """
  61. def __call__(self, *input_tensor_list):
  62. for tensor in input_tensor_list:
  63. if not isinstance(tensor, (np.ndarray, Image.Image)):
  64. raise TypeError(
  65. "Input should be NumPy or PIL image, got {}.".format(type(tensor)))
  66. return super().__call__(*input_tensor_list)
  67. def parse(self):
  68. raise NotImplementedError(
  69. "ImageTensorOperation has to implement parse() method.")
  70. DE_C_AUTO_AUGMENT_POLICY = {AutoAugmentPolicy.IMAGENET: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_IMAGENET,
  71. AutoAugmentPolicy.CIFAR10: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_CIFAR10,
  72. AutoAugmentPolicy.SVHN: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_SVHN}
  73. DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
  74. Border.EDGE: cde.BorderType.DE_BORDER_EDGE,
  75. Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
  76. Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
  77. DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
  78. ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}
  79. DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
  80. Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
  81. Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
  82. Inter.AREA: cde.InterpolationMode.DE_INTER_AREA,
  83. Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC}
  84. DE_C_SLICE_MODE = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD,
  85. SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP}
  86. DE_C_CONVERTCOLOR_MODE = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA,
  87. ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA,
  88. ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR,
  89. ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB,
  90. ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA,
  91. ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA,
  92. ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR,
  93. ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB,
  94. ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB,
  95. ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR,
  96. ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA,
  97. ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA,
  98. ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY,
  99. ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY,
  100. ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR,
  101. ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB,
  102. ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA,
  103. ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA,
  104. ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY,
  105. ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY,
  106. }
  107. def parse_padding(padding):
  108. """ Parses and prepares the padding tuple"""
  109. if isinstance(padding, numbers.Number):
  110. padding = [padding] * 4
  111. if len(padding) == 2:
  112. left = top = padding[0]
  113. right = bottom = padding[1]
  114. padding = (left, top, right, bottom,)
  115. if isinstance(padding, list):
  116. padding = tuple(padding)
  117. return padding
  118. class AdjustGamma(ImageTensorOperation):
  119. r"""
  120. Apply gamma correction on input image. Input image is expected to be in [..., H, W, C] or [H, W] format.
  121. .. math::
  122. I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
  123. See `Gamma Correction`_ for more details.
  124. .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
  125. Args:
  126. gamma (float): Non negative real number.
  127. The output image pixel value is exponentially related to the input image pixel value.
  128. gamma larger than 1 make the shadows darker,
  129. while gamma smaller than 1 make dark regions lighter.
  130. gain (float, optional): The constant multiplier (default=1).
  131. Examples:
  132. >>> transforms_list = [c_vision.Decode(), c_vision.AdjustGamma(gamma=10.0, gain=1.0)]
  133. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  134. ... input_columns=["image"])
  135. """
  136. @check_adjust_gamma
  137. def __init__(self, gamma, gain=1):
  138. self.gamma = gamma
  139. self.gain = gain
  140. def parse(self):
  141. return cde.AdjustGammaOperation(self.gamma, self.gain)
  142. class AutoAugment(ImageTensorOperation):
  143. """
  144. Apply AutoAugment data augmentation method based on
  145. `AutoAugment: Learning Augmentation Strategies from Data <https://arxiv.org/pdf/1805.09501.pdf>`_.
  146. This operation works only with 3-channel RGB images.
  147. Args:
  148. policy (AutoAugmentPolicy, optional): AutoAugment policies learned on different datasets
  149. (default=AutoAugmentPolicy.IMAGENET).
  150. It can be any of [AutoAugmentPolicy.IMAGENET, AutoAugmentPolicy.CIFAR10, AutoAugmentPolicy.SVHN].
  151. Randomly apply 2 operations from a candidate set. See auto augmentation details in AutoAugmentPolicy.
  152. - AutoAugmentPolicy.IMAGENET, means to apply AutoAugment learned on ImageNet dataset.
  153. - AutoAugmentPolicy.CIFAR10, means to apply AutoAugment learned on Cifar10 dataset.
  154. - AutoAugmentPolicy.SVHN, means to apply AutoAugment learned on SVHN dataset.
  155. interpolation (Inter, optional): Image interpolation mode for Resize operator (default=Inter.NEAREST).
  156. It can be any of [Inter.NEAREST, Inter.BILINEAR, Inter.BICUBIC, Inter.AREA].
  157. - Inter.NEAREST: means interpolation method is nearest-neighbor interpolation.
  158. - Inter.BILINEAR: means interpolation method is bilinear interpolation.
  159. - Inter.BICUBIC: means the interpolation method is bicubic interpolation.
  160. - Inter.AREA: means the interpolation method is area interpolation.
  161. fill_value (Union[int, tuple], optional): Pixel fill value for the area outside the transformed image.
  162. It can be an int or a 3-tuple. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
  163. If it is an integer, it is used for all RGB channels. The fill_value values must be in range [0, 255]
  164. (default=0).
  165. Examples:
  166. >>> from mindspore.dataset.vision import AutoAugmentPolicy, Inter
  167. >>> transforms_list = [c_vision.Decode(), c_vision.AutoAugment(policy=AutoAugmentPolicy.IMAGENET,
  168. ... interpolation=Inter.NEAREST,
  169. ... fill_value=0)]
  170. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  171. ... input_columns=["image"])
  172. """
  173. @check_auto_augment
  174. def __init__(self, policy=AutoAugmentPolicy.IMAGENET, interpolation=Inter.NEAREST, fill_value=0):
  175. self.policy = policy
  176. self.interpolation = interpolation
  177. if isinstance(fill_value, int):
  178. fill_value = tuple([fill_value] * 3)
  179. self.fill_value = fill_value
  180. def parse(self):
  181. return cde.AutoAugmentOperation(DE_C_AUTO_AUGMENT_POLICY[self.policy], DE_C_INTER_MODE[self.interpolation],
  182. self.fill_value)
  183. class AutoContrast(ImageTensorOperation):
  184. """
  185. Apply automatic contrast on input image. This operator calculates histogram of image, reassign cutoff percent
  186. of lightest pixels from histogram to 255, and reassign cutoff percent of darkest pixels from histogram to 0.
  187. Args:
  188. cutoff (float, optional): Percent of lightest and darkest pixels to cut off from
  189. the histogram of input image. the value must be in the range [0.0, 50.0) (default=0.0).
  190. ignore (Union[int, sequence], optional): The background pixel values to ignore (default=None).
  191. Examples:
  192. >>> transforms_list = [c_vision.Decode(), c_vision.AutoContrast(cutoff=10.0, ignore=[10, 20])]
  193. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  194. ... input_columns=["image"])
  195. """
  196. @check_auto_contrast
  197. def __init__(self, cutoff=0.0, ignore=None):
  198. if ignore is None:
  199. ignore = []
  200. if isinstance(ignore, int):
  201. ignore = [ignore]
  202. self.cutoff = cutoff
  203. self.ignore = ignore
  204. def parse(self):
  205. return cde.AutoContrastOperation(self.cutoff, self.ignore)
  206. class BoundingBoxAugment(ImageTensorOperation):
  207. """
  208. Apply a given image transform on a random selection of bounding box regions of a given image.
  209. Args:
  210. transform: C++ transformation operator to be applied on random selection
  211. of bounding box regions of a given image.
  212. ratio (float, optional): Ratio of bounding boxes to apply augmentation on.
  213. Range: [0, 1] (default=0.3).
  214. Examples:
  215. >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes
  216. >>> bbox_aug_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)
  217. >>> # map to apply ops
  218. >>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op],
  219. ... input_columns=["image", "bbox"],
  220. ... output_columns=["image", "bbox"],
  221. ... column_order=["image", "bbox"])
  222. """
  223. @check_bounding_box_augment_cpp
  224. def __init__(self, transform, ratio=0.3):
  225. self.ratio = ratio
  226. self.transform = transform
  227. def parse(self):
  228. if self.transform and getattr(self.transform, 'parse', None):
  229. transform = self.transform.parse()
  230. else:
  231. transform = self.transform
  232. return cde.BoundingBoxAugmentOperation(transform, self.ratio)
  233. class CenterCrop(ImageTensorOperation):
  234. """
  235. Crop the input image at the center to the given size. If input image size is smaller than output size,
  236. input image will be padded with 0 before cropping.
  237. Args:
  238. size (Union[int, sequence]): The output size of the cropped image.
  239. If size is an integer, a square crop of size (size, size) is returned.
  240. If size is a sequence of length 2, it should be (height, width).
  241. Examples:
  242. >>> # crop image to a square
  243. >>> transforms_list1 = [c_vision.Decode(), c_vision.CenterCrop(50)]
  244. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  245. ... input_columns=["image"])
  246. >>> # crop image to portrait style
  247. >>> transforms_list2 = [c_vision.Decode(), c_vision.CenterCrop((60, 40))]
  248. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  249. ... input_columns=["image"])
  250. """
  251. @check_center_crop
  252. def __init__(self, size):
  253. if isinstance(size, int):
  254. size = (size, size)
  255. self.size = size
  256. def parse(self):
  257. return cde.CenterCropOperation(self.size)
  258. class ConvertColor(ImageTensorOperation):
  259. """
  260. Change the color space of the image.
  261. Args:
  262. convert_mode (ConvertMode): The mode of image channel conversion.
  263. - ConvertMode.COLOR_BGR2BGRA, Add alpha channel to BGR image.
  264. - ConvertMode.COLOR_RGB2RGBA, Add alpha channel to RGB image.
  265. - ConvertMode.COLOR_BGRA2BGR, Remove alpha channel to BGR image.
  266. - ConvertMode.COLOR_RGBA2RGB, Remove alpha channel to RGB image.
  267. - ConvertMode.COLOR_BGR2RGBA, Convert BGR image to RGBA image.
  268. - ConvertMode.COLOR_RGB2BGRA, Convert RGB image to BGRA image.
  269. - ConvertMode.COLOR_RGBA2BGR, Convert RGBA image to BGR image.
  270. - ConvertMode.COLOR_BGRA2RGB, Convert BGRA image to RGB image.
  271. - ConvertMode.COLOR_BGR2RGB, Convert BGR image to RGB image.
  272. - ConvertMode.COLOR_RGB2BGR, Convert RGB image to BGR image.
  273. - ConvertMode.COLOR_BGRA2RGBA, Convert BGRA image to RGBA image.
  274. - ConvertMode.COLOR_RGBA2BGRA, Convert RGBA image to BGRA image.
  275. - ConvertMode.COLOR_BGR2GRAY, Convert BGR image to GRAY image.
  276. - ConvertMode.COLOR_RGB2GRAY, Convert RGB image to GRAY image.
  277. - ConvertMode.COLOR_GRAY2BGR, Convert GRAY image to BGR image.
  278. - ConvertMode.COLOR_GRAY2RGB, Convert GRAY image to RGB image.
  279. - ConvertMode.COLOR_GRAY2BGRA, Convert GRAY image to BGRA image.
  280. - ConvertMode.COLOR_GRAY2RGBA, Convert GRAY image to RGBA image.
  281. - ConvertMode.COLOR_BGRA2GRAY, Convert BGRA image to GRAY image.
  282. - ConvertMode.COLOR_RGBA2GRAY, Convert RGBA image to GRAY image.
  283. Examples:
  284. >>> import mindspore.dataset.vision.utils as mode
  285. >>> # Convert RGB images to GRAY images
  286. >>> convert_op = c_vision.ConvertColor(mode.ConvertMode.COLOR_RGB2GRAY)
  287. >>> image_folder_dataset = image_folder_dataset.map(operations=convert_op,
  288. ... input_columns=["image"])
  289. >>> # Convert RGB images to BGR images
  290. >>> convert_op = c_vision.ConvertColor(mode.ConvertMode.COLOR_RGB2BGR)
  291. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=convert_op,
  292. ... input_columns=["image"])
  293. """
  294. @check_convert_color
  295. def __init__(self, convert_mode):
  296. self.convert_mode = convert_mode
  297. def parse(self):
  298. return cde.ConvertColorOperation(DE_C_CONVERTCOLOR_MODE[self.convert_mode])
  299. class Crop(ImageTensorOperation):
  300. """
  301. Crop the input image at a specific location.
  302. Args:
  303. coordinates(sequence): Coordinates of the upper left corner of the cropping image. Must be a sequence of two
  304. values, in the form of (top, left).
  305. size (Union[int, sequence]): The output size of the cropped image.
  306. If size is an integer, a square crop of size (size, size) is returned.
  307. If size is a sequence of length 2, it should be (height, width).
  308. Examples:
  309. >>> decode_op = c_vision.Decode()
  310. >>> crop_op = c_vision.Crop((0, 0), 32)
  311. >>> transforms_list = [decode_op, crop_op]
  312. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  313. ... input_columns=["image"])
  314. """
  315. @check_crop
  316. def __init__(self, coordinates, size):
  317. if isinstance(size, int):
  318. size = (size, size)
  319. self.coordinates = coordinates
  320. self.size = size
  321. def parse(self):
  322. return cde.CropOperation(self.coordinates, self.size)
  323. class CutMixBatch(ImageTensorOperation):
  324. """
  325. Apply CutMix transformation on input batch of images and labels.
  326. Note that you need to make labels into one-hot format and batched before calling this operator.
  327. Args:
  328. image_batch_format (Image Batch Format): The method of padding. Can be any of
  329. [ImageBatchFormat.NHWC, ImageBatchFormat.NCHW].
  330. alpha (float, optional): hyperparameter of beta distribution (default = 1.0).
  331. prob (float, optional): The probability by which CutMix is applied to each image (default = 1.0).
  332. Examples:
  333. >>> from mindspore.dataset.vision import ImageBatchFormat
  334. >>> onehot_op = c_transforms.OneHot(num_classes=10)
  335. >>> image_folder_dataset= image_folder_dataset.map(operations=onehot_op,
  336. ... input_columns=["label"])
  337. >>> cutmix_batch_op = c_vision.CutMixBatch(ImageBatchFormat.NHWC, 1.0, 0.5)
  338. >>> image_folder_dataset = image_folder_dataset.batch(5)
  339. >>> image_folder_dataset = image_folder_dataset.map(operations=cutmix_batch_op,
  340. ... input_columns=["image", "label"])
  341. """
  342. @check_cut_mix_batch_c
  343. def __init__(self, image_batch_format, alpha=1.0, prob=1.0):
  344. self.image_batch_format = image_batch_format.value
  345. self.alpha = alpha
  346. self.prob = prob
  347. def parse(self):
  348. return cde.CutMixBatchOperation(DE_C_IMAGE_BATCH_FORMAT[self.image_batch_format], self.alpha, self.prob)
  349. class CutOut(ImageTensorOperation):
  350. """
  351. Randomly cut (mask) out a given number of square patches from the input image array.
  352. Args:
  353. length (int): The side length of each square patch.
  354. num_patches (int, optional): Number of patches to be cut out of an image (default=1).
  355. Examples:
  356. >>> transforms_list = [c_vision.Decode(), c_vision.CutOut(80, num_patches=10)]
  357. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  358. ... input_columns=["image"])
  359. """
  360. @check_cutout
  361. def __init__(self, length, num_patches=1):
  362. self.length = length
  363. self.num_patches = num_patches
  364. def parse(self):
  365. return cde.CutOutOperation(self.length, self.num_patches)
  366. class Decode(ImageTensorOperation):
  367. """
  368. Decode the input image in RGB mode(default) or BGR mode(deprecated).
  369. Args:
  370. rgb (bool, optional): Mode of decoding input image (default=True).
  371. If True means format of decoded image is RGB else BGR(deprecated).
  372. Examples:
  373. >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlip()]
  374. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  375. ... input_columns=["image"])
  376. """
  377. def __init__(self, rgb=True):
  378. self.rgb = rgb
  379. def __call__(self, img):
  380. """
  381. Call method.
  382. Args:
  383. img (NumPy): Image to be decoded.
  384. Returns:
  385. img (NumPy), Decoded image.
  386. """
  387. if isinstance(img, bytes):
  388. img = np.frombuffer(img, np.uint8)
  389. elif not isinstance(img, np.ndarray) or img.ndim != 1 or img.dtype.type is np.str_:
  390. raise TypeError(
  391. "Input should be an encoded image in 1-D NumPy format, got {}.".format(type(img)))
  392. return super().__call__(img)
  393. def parse(self):
  394. return cde.DecodeOperation(self.rgb)
  395. class Equalize(ImageTensorOperation):
  396. """
  397. Apply histogram equalization on input image.
  398. Examples:
  399. >>> transforms_list = [c_vision.Decode(), c_vision.Equalize()]
  400. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  401. ... input_columns=["image"])
  402. """
  403. def parse(self):
  404. return cde.EqualizeOperation()
  405. class GaussianBlur(ImageTensorOperation):
  406. """
  407. Blur input image with the specified Gaussian kernel.
  408. Args:
  409. kernel_size (Union[int, sequence]): Size of the Gaussian kernel to use. The value must be positive and odd. If
  410. only an integer is provided, the kernel size will be (size, size). If a sequence of integer is provided, it
  411. must be a sequence of 2 values which represents (width, height).
  412. sigma (Union[float, sequence], optional): Standard deviation of the Gaussian kernel to use (default=None). The
  413. value must be positive. If only a float is provided, the sigma will be (sigma, sigma). If a sequence of
  414. float is provided, it must be a sequence of 2 values which represents the sigma of width and height. If None
  415. is provided, the sigma will be calculated as ((kernel_size - 1) * 0.5 - 1) * 0.3 + 0.8.
  416. Examples:
  417. >>> transforms_list = [c_vision.Decode(), c_vision.GaussianBlur(3, 3)]
  418. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  419. ... input_columns=["image"])
  420. """
  421. @check_gaussian_blur
  422. def __init__(self, kernel_size, sigma=None):
  423. if isinstance(kernel_size, int):
  424. kernel_size = (kernel_size,)
  425. if sigma is None:
  426. sigma = (0,)
  427. elif isinstance(sigma, (int, float)):
  428. sigma = (float(sigma),)
  429. self.kernel_size = kernel_size
  430. self.sigma = sigma
  431. def parse(self):
  432. return cde.GaussianBlurOperation(self.kernel_size, self.sigma)
  433. class HorizontalFlip(ImageTensorOperation):
  434. """
  435. Flip the input image horizontally.
  436. Examples:
  437. >>> transforms_list = [c_vision.Decode(), c_vision.HorizontalFlip()]
  438. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  439. ... input_columns=["image"])
  440. """
  441. def parse(self):
  442. return cde.HorizontalFlipOperation()
  443. class HWC2CHW(ImageTensorOperation):
  444. """
  445. Transpose the input image from shape (H, W, C) to shape (C, H, W). The input image should be 3 channels image.
  446. Examples:
  447. >>> transforms_list = [c_vision.Decode(),
  448. ... c_vision.RandomHorizontalFlip(0.75),
  449. ... c_vision.RandomCrop(512),
  450. ... c_vision.HWC2CHW()]
  451. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  452. ... input_columns=["image"])
  453. """
  454. def parse(self):
  455. return cde.HwcToChwOperation()
  456. class Invert(ImageTensorOperation):
  457. """
  458. Apply invert on input image in RGB mode. This operator will reassign every pixel to (255 - pixel).
  459. Examples:
  460. >>> transforms_list = [c_vision.Decode(), c_vision.Invert()]
  461. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  462. ... input_columns=["image"])
  463. """
  464. def parse(self):
  465. return cde.InvertOperation()
  466. class MixUpBatch(ImageTensorOperation):
  467. """
  468. Apply MixUp transformation on input batch of images and labels. Each image is
  469. multiplied by a random weight (lambda) and then added to a randomly selected image from the batch
  470. multiplied by (1 - lambda). The same formula is also applied to the one-hot labels.
  471. The lambda is generated based on the specified alpha value. Two coefficients x1, x2 are randomly generated
  472. in the range [alpha, 1], and lambda = (x1 / (x1 + x2)).
  473. Note that you need to make labels into one-hot format and batched before calling this operator.
  474. Args:
  475. alpha (float, optional): Hyperparameter of beta distribution (default = 1.0).
  476. Examples:
  477. >>> onehot_op = c_transforms.OneHot(num_classes=10)
  478. >>> image_folder_dataset= image_folder_dataset.map(operations=onehot_op,
  479. ... input_columns=["label"])
  480. >>> mixup_batch_op = c_vision.MixUpBatch(alpha=0.9)
  481. >>> image_folder_dataset = image_folder_dataset.batch(5)
  482. >>> image_folder_dataset = image_folder_dataset.map(operations=mixup_batch_op,
  483. ... input_columns=["image", "label"])
  484. """
  485. @check_mix_up_batch_c
  486. def __init__(self, alpha=1.0):
  487. self.alpha = alpha
  488. def parse(self):
  489. return cde.MixUpBatchOperation(self.alpha)
  490. class Normalize(ImageTensorOperation):
  491. """
  492. Normalize the input image with respect to mean and standard deviation. This operator will normalize
  493. the input image with: output[channel] = (input[channel] - mean[channel]) / std[channel], where channel >= 1.
  494. Args:
  495. mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
  496. The mean values must be in range [0.0, 255.0].
  497. std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
  498. The standard deviation values must be in range (0.0, 255.0].
  499. Examples:
  500. >>> decode_op = c_vision.Decode()
  501. >>> normalize_op = c_vision.Normalize(mean=[121.0, 115.0, 100.0], std=[70.0, 68.0, 71.0])
  502. >>> transforms_list = [decode_op, normalize_op]
  503. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  504. ... input_columns=["image"])
  505. """
  506. @check_normalize_c
  507. def __init__(self, mean, std):
  508. self.mean = mean
  509. self.std = std
  510. def parse(self):
  511. return cde.NormalizeOperation(self.mean, self.std)
  512. class NormalizePad(ImageTensorOperation):
  513. """
  514. Normalize the input image with respect to mean and standard deviation then pad an extra channel with value zero.
  515. Args:
  516. mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
  517. The mean values must be in range (0.0, 255.0].
  518. std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
  519. The standard deviation values must be in range (0.0, 255.0].
  520. dtype (str): Set the output data type of normalized image (default is "float32").
  521. Examples:
  522. >>> decode_op = c_vision.Decode()
  523. >>> normalize_pad_op = c_vision.NormalizePad(mean=[121.0, 115.0, 100.0],
  524. ... std=[70.0, 68.0, 71.0],
  525. ... dtype="float32")
  526. >>> transforms_list = [decode_op, normalize_pad_op]
  527. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  528. ... input_columns=["image"])
  529. """
  530. @check_normalizepad_c
  531. def __init__(self, mean, std, dtype="float32"):
  532. self.mean = mean
  533. self.std = std
  534. self.dtype = dtype
  535. def parse(self):
  536. return cde.NormalizePadOperation(self.mean, self.std, self.dtype)
  537. class Pad(ImageTensorOperation):
  538. """
  539. Pad the image according to padding parameters.
  540. Args:
  541. padding (Union[int, sequence]): The number of pixels to pad the image.
  542. If a single number is provided, it pads all borders with this value.
  543. If a tuple or lists of 2 values are provided, it pads the (left and top)
  544. with the first value and (right and bottom) with the second value.
  545. If 4 values are provided as a list or tuple,
  546. it pads the left, top, right and bottom respectively.
  547. fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for
  548. padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
  549. If it is an integer, it is used for all RGB channels.
  550. The fill_value values must be in range [0, 255] (default=0).
  551. padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of
  552. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].
  553. - Border.CONSTANT, means it fills the border with constant values.
  554. - Border.EDGE, means it pads with the last value on the edge.
  555. - Border.REFLECT, means it reflects the values on the edge omitting the last
  556. value of edge.
  557. - Border.SYMMETRIC, means it reflects the values on the edge repeating the last
  558. value of edge.
  559. Examples:
  560. >>> transforms_list = [c_vision.Decode(), c_vision.Pad([100, 100, 100, 100])]
  561. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  562. ... input_columns=["image"])
  563. """
  564. @check_pad
  565. def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT):
  566. padding = parse_padding(padding)
  567. if isinstance(fill_value, int):
  568. fill_value = tuple([fill_value] * 3)
  569. self.padding = padding
  570. self.fill_value = fill_value
  571. self.padding_mode = padding_mode
  572. def parse(self):
  573. return cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode])
  574. class RandomAdjustSharpness(ImageTensorOperation):
  575. """
  576. Randomly adjust the sharpness of the input image with a given probability.
  577. Args:
  578. degree (float): Sharpness adjustment degree, which must be non negative.
  579. Degree of 0.0 gives a blurred image, degree of 1.0 gives the original image,
  580. and degree of 2.0 increases the sharpness by a factor of 2.
  581. prob (float, optional): Probability of the image being sharpness adjusted, which
  582. must be in range of [0, 1] (default=0.5).
  583. Examples:
  584. >>> transforms_list = [c_vision.Decode(), c_vision.RandomAdjustSharpness(2.0, 0.5)]
  585. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  586. ... input_columns=["image"])
  587. """
  588. @check_random_adjust_sharpness
  589. def __init__(self, degree, prob=0.5):
  590. self.prob = prob
  591. self.degree = degree
  592. def parse(self):
  593. return cde.RandomAdjustSharpnessOperation(self.degree, self.prob)
  594. class RandomAffine(ImageTensorOperation):
  595. """
  596. Apply Random affine transformation to the input image.
  597. Args:
  598. degrees (int or float or sequence): Range of the rotation degrees.
  599. If `degrees` is a number, the range will be (-degrees, degrees).
  600. If `degrees` is a sequence, it should be (min, max).
  601. translate (sequence, optional): Sequence (tx_min, tx_max, ty_min, ty_max) of minimum/maximum translation in
  602. x(horizontal) and y(vertical) directions (default=None).
  603. The horizontal and vertical shift is selected randomly from the range:
  604. (tx_min*width, tx_max*width) and (ty_min*height, ty_max*height), respectively.
  605. If a tuple or list of size 2, then a translate parallel to the X axis in the range of
  606. (translate[0], translate[1]) is applied.
  607. If a tuple of list of size 4, then a translate parallel to the X axis in the range of
  608. (translate[0], translate[1]) and a translate parallel to the Y axis in the range of
  609. (translate[2], translate[3]) are applied.
  610. If None, no translation is applied.
  611. scale (sequence, optional): Scaling factor interval (default=None, original scale is used).
  612. shear (int or float or sequence, optional): Range of shear factor (default=None).
  613. If a number, then a shear parallel to the X axis in the range of (-shear, +shear) is applied.
  614. If a tuple or list of size 2, then a shear parallel to the X axis in the range of (shear[0], shear[1])
  615. is applied.
  616. If a tuple of list of size 4, then a shear parallel to X axis in the range of (shear[0], shear[1])
  617. and a shear parallel to Y axis in the range of (shear[2], shear[3]) is applied.
  618. If None, no shear is applied.
  619. resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST).
  620. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  621. - Inter.BILINEAR, means resample method is bilinear interpolation.
  622. - Inter.NEAREST, means resample method is nearest-neighbor interpolation.
  623. - Inter.BICUBIC, means resample method is bicubic interpolation.
  624. fill_value (tuple or int, optional): Optional fill_value to fill the area outside the transform
  625. in the output image. There must be three elements in tuple and the value of single element is [0, 255].
  626. (default=0, filling is performed).
  627. Raises:
  628. ValueError: If `degrees` is negative.
  629. ValueError: If translation value is not between -1 and 1.
  630. ValueError: If scale is not positive.
  631. ValueError: If shear is a number but is not positive.
  632. TypeError: If `degrees` is not a number or a list or a tuple.
  633. If `degrees` is a list or tuple, its length is not 2.
  634. TypeError: If translate is specified but is not list or a tuple of length 2 or 4.
  635. TypeError: If scale is not a list or tuple of length 2.
  636. TypeError: If shear is not a list or tuple of length 2 or 4.
  637. TypeError: If fill_value is not a single integer or a 3-tuple.
  638. Examples:
  639. >>> from mindspore.dataset.vision import Inter
  640. >>> decode_op = c_vision.Decode()
  641. >>> random_affine_op = c_vision.RandomAffine(degrees=15,
  642. ... translate=(-0.1, 0.1, 0, 0),
  643. ... scale=(0.9, 1.1),
  644. ... resample=Inter.NEAREST)
  645. >>> transforms_list = [decode_op, random_affine_op]
  646. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  647. ... input_columns=["image"])
  648. """
  649. @check_random_affine
  650. def __init__(self, degrees, translate=None, scale=None, shear=None, resample=Inter.NEAREST, fill_value=0):
  651. # Parameter checking
  652. if shear is not None:
  653. if isinstance(shear, numbers.Number):
  654. shear = (-1 * shear, shear, 0., 0.)
  655. else:
  656. if len(shear) == 2:
  657. shear = [shear[0], shear[1], 0., 0.]
  658. elif len(shear) == 4:
  659. shear = [s for s in shear]
  660. if isinstance(degrees, numbers.Number):
  661. degrees = (-1 * degrees, degrees)
  662. if isinstance(fill_value, numbers.Number):
  663. fill_value = (fill_value, fill_value, fill_value)
  664. # translation
  665. if translate is None:
  666. translate = (0.0, 0.0, 0.0, 0.0)
  667. # scale
  668. if scale is None:
  669. scale = (1.0, 1.0)
  670. # shear
  671. if shear is None:
  672. shear = (0.0, 0.0, 0.0, 0.0)
  673. self.degrees = degrees
  674. self.translate = translate
  675. self.scale_ = scale
  676. self.shear = shear
  677. self.resample = DE_C_INTER_MODE[resample]
  678. self.fill_value = fill_value
  679. def parse(self):
  680. return cde.RandomAffineOperation(self.degrees, self.translate, self.scale_, self.shear, self.resample,
  681. self.fill_value)
  682. class RandomAutoContrast(ImageTensorOperation):
  683. """
  684. Automatically adjust the contrast of the image with a given probability.
  685. Args:
  686. cutoff (float, optional): Percent of the lightest and darkest pixels to be cut off from
  687. the histogram of the input image. The value must be in range of [0.0, 50.0) (default=0.0).
  688. ignore (Union[int, sequence], optional): The background pixel values to be ignored, each of
  689. which must be in range of [0, 255] (default=None).
  690. prob (float, optional): Probability of the image being automatically contrasted, which
  691. must be in range of [0, 1] (default=0.5).
  692. Examples:
  693. >>> transforms_list = [c_vision.Decode(), c_vision.RandomAutoContrast(cutoff=0.0, ignore=None, prob=0.5)]
  694. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  695. ... input_columns=["image"])
  696. """
  697. @check_random_auto_contrast
  698. def __init__(self, cutoff=0.0, ignore=None, prob=0.5):
  699. if ignore is None:
  700. ignore = []
  701. if isinstance(ignore, int):
  702. ignore = [ignore]
  703. self.cutoff = cutoff
  704. self.ignore = ignore
  705. self.prob = prob
  706. def parse(self):
  707. return cde.RandomAutoContrastOperation(self.cutoff, self.ignore, self.prob)
  708. class RandomColor(ImageTensorOperation):
  709. """
  710. Adjust the color of the input image by a fixed or random degree.
  711. This operation works only with 3-channel color images.
  712. Args:
  713. degrees (sequence, optional): Range of random color adjustment degrees.
  714. It should be in (min, max) format. If min=max, then it is a
  715. single fixed magnitude operation (default=(0.1, 1.9)).
  716. Examples:
  717. >>> transforms_list = [c_vision.Decode(), c_vision.RandomColor((0.5, 2.0))]
  718. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  719. ... input_columns=["image"])
  720. """
  721. @check_positive_degrees
  722. def __init__(self, degrees=(0.1, 1.9)):
  723. self.degrees = degrees
  724. def parse(self):
  725. return cde.RandomColorOperation(*self.degrees)
  726. class RandomColorAdjust(ImageTensorOperation):
  727. """
  728. Randomly adjust the brightness, contrast, saturation, and hue of the input image.
  729. Args:
  730. brightness (Union[float, list, tuple], optional): Brightness adjustment factor (default=(1, 1)).
  731. Cannot be negative.
  732. If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness].
  733. If it is a sequence, it should be [min, max] for the range.
  734. contrast (Union[float, list, tuple], optional): Contrast adjustment factor (default=(1, 1)).
  735. Cannot be negative.
  736. If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast].
  737. If it is a sequence, it should be [min, max] for the range.
  738. saturation (Union[float, list, tuple], optional): Saturation adjustment factor (default=(1, 1)).
  739. Cannot be negative.
  740. If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation].
  741. If it is a sequence, it should be [min, max] for the range.
  742. hue (Union[float, list, tuple], optional): Hue adjustment factor (default=(0, 0)).
  743. If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5.
  744. If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5.
  745. Examples:
  746. >>> decode_op = c_vision.Decode()
  747. >>> transform_op = c_vision.RandomColorAdjust(brightness=(0.5, 1),
  748. ... contrast=(0.4, 1),
  749. ... saturation=(0.3, 1))
  750. >>> transforms_list = [decode_op, transform_op]
  751. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  752. ... input_columns=["image"])
  753. """
  754. @check_random_color_adjust
  755. def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)):
  756. brightness = self.__expand_values(brightness)
  757. contrast = self.__expand_values(contrast)
  758. saturation = self.__expand_values(saturation)
  759. hue = self.__expand_values(
  760. hue, center=0, bound=(-0.5, 0.5), non_negative=False)
  761. self.brightness = brightness
  762. self.contrast = contrast
  763. self.saturation = saturation
  764. self.hue = hue
  765. def __expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True):
  766. """Expand input value for vision adjustment factor."""
  767. if isinstance(value, numbers.Number):
  768. value = [center - value, center + value]
  769. if non_negative:
  770. value[0] = max(0, value[0])
  771. check_range(value, bound)
  772. return (value[0], value[1])
  773. def parse(self):
  774. return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue)
  775. class RandomCrop(ImageTensorOperation):
  776. """
  777. Crop the input image at a random location. If input image size is smaller than output size,
  778. input image will be padded before cropping.
  779. Note:
  780. If the input image is more than one, then make sure that the image size is the same.
  781. Args:
  782. size (Union[int, sequence]): The output size of the cropped image.
  783. If size is an integer, a square crop of size (size, size) is returned.
  784. If size is a sequence of length 2, it should be (height, width).
  785. padding (Union[int, sequence], optional): The number of pixels to pad the image (default=None).
  786. If padding is not None, pad image first with padding values.
  787. If a single number is provided, pad all borders with this value.
  788. If a tuple or lists of 2 values are provided, pad the (left and top)
  789. with the first value and (right and bottom) with the second value.
  790. If 4 values are provided as a list or tuple,
  791. pad the left, top, right and bottom respectively.
  792. pad_if_needed (bool, optional): Pad the image if either side is smaller than
  793. the given output size (default=False).
  794. fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for
  795. padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
  796. If it is an integer, it is used for all RGB channels.
  797. The fill_value values must be in range [0, 255] (default=0).
  798. padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of
  799. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].
  800. - Border.CONSTANT, means it fills the border with constant values.
  801. - Border.EDGE, means it pads with the last value on the edge.
  802. - Border.REFLECT, means it reflects the values on the edge omitting the last
  803. value of edge.
  804. - Border.SYMMETRIC, means it reflects the values on the edge repeating the last
  805. value of edge.
  806. Examples:
  807. >>> from mindspore.dataset.vision import Border
  808. >>> decode_op = c_vision.Decode()
  809. >>> random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=Border.EDGE)
  810. >>> transforms_list = [decode_op, random_crop_op]
  811. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  812. ... input_columns=["image"])
  813. """
  814. @check_random_crop
  815. def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
  816. if isinstance(size, int):
  817. size = (size, size)
  818. if padding is None:
  819. padding = (0, 0, 0, 0)
  820. else:
  821. padding = parse_padding(padding)
  822. if isinstance(fill_value, int):
  823. fill_value = tuple([fill_value] * 3)
  824. self.size = size
  825. self.padding = padding
  826. self.pad_if_needed = pad_if_needed
  827. self.fill_value = fill_value
  828. self.padding_mode = padding_mode.value
  829. def parse(self):
  830. border_type = DE_C_BORDER_TYPE[self.padding_mode]
  831. return cde.RandomCropOperation(self.size, self.padding, self.pad_if_needed, self.fill_value, border_type)
  832. class RandomCropDecodeResize(ImageTensorOperation):
  833. """
  834. A combination of `Crop`, `Decode` and `Resize`. It will get better performance for JPEG images. This operator
  835. will crop the input image at a random location, decode the cropped image in RGB mode, and resize the decoded image.
  836. Args:
  837. size (Union[int, sequence]): The output size of the resized image.
  838. If size is an integer, a square crop of size (size, size) is returned.
  839. If size is a sequence of length 2, it should be (height, width).
  840. scale (list, tuple, optional): Range [min, max) of respective size of the
  841. original size to be cropped (default=(0.08, 1.0)).
  842. ratio (list, tuple, optional): Range [min, max) of aspect ratio to be
  843. cropped (default=(3. / 4., 4. / 3.)).
  844. interpolation (Inter mode, optional): Image interpolation mode for resize operator(default=Inter.BILINEAR).
  845. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC, Inter.AREA, Inter.PILCUBIC].
  846. - Inter.BILINEAR, means interpolation method is bilinear interpolation.
  847. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  848. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  849. - Inter.AREA, means interpolation method is pixel area interpolation.
  850. - Inter.PILCUBIC, means interpolation method is bicubic interpolation like implemented in pillow, input
  851. should be in 3 channels format.
  852. max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10).
  853. If exceeded, fall back to use center_crop instead.
  854. Examples:
  855. >>> from mindspore.dataset.vision import Inter
  856. >>> resize_crop_decode_op = c_vision.RandomCropDecodeResize(size=(50, 75),
  857. ... scale=(0.25, 0.5),
  858. ... interpolation=Inter.NEAREST,
  859. ... max_attempts=5)
  860. >>> transforms_list = [resize_crop_decode_op]
  861. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  862. ... input_columns=["image"])
  863. """
  864. @check_random_resize_crop
  865. def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
  866. interpolation=Inter.BILINEAR, max_attempts=10):
  867. if isinstance(size, int):
  868. size = (size, size)
  869. self.size = size
  870. self.scale = scale
  871. self.ratio = ratio
  872. self.interpolation = interpolation
  873. self.max_attempts = max_attempts
  874. def parse(self):
  875. return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio,
  876. DE_C_INTER_MODE[self.interpolation],
  877. self.max_attempts)
  878. def __call__(self, img):
  879. if not isinstance(img, np.ndarray):
  880. raise TypeError(
  881. "Input should be an encoded image in 1-D NumPy format, got {}.".format(type(img)))
  882. if img.ndim != 1 or img.dtype.type is not np.uint8:
  883. raise TypeError("Input should be an encoded image with uint8 type in 1-D NumPy format, " +
  884. "got format:{}, dtype:{}.".format(type(img), img.dtype.type))
  885. return super().__call__(img)
  886. class RandomCropWithBBox(ImageTensorOperation):
  887. """
  888. Crop the input image at a random location and adjust bounding boxes accordingly.
  889. Args:
  890. size (Union[int, sequence]): The output size of the cropped image.
  891. If size is an integer, a square crop of size (size, size) is returned.
  892. If size is a sequence of length 2, it should be (height, width).
  893. padding (Union[int, sequence], optional): The number of pixels to pad the image (default=None).
  894. If padding is not None, first pad image with padding values.
  895. If a single number is provided, pad all borders with this value.
  896. If a tuple or lists of 2 values are provided, pad the (left and top)
  897. with the first value and (right and bottom) with the second value.
  898. If 4 values are provided as a list or tuple, pad the left, top, right and bottom respectively.
  899. pad_if_needed (bool, optional): Pad the image if either side is smaller than
  900. the given output size (default=False).
  901. fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for
  902. padding_mode Border.CONSTANT. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
  903. If it is an integer, it is used for all RGB channels.
  904. The fill_value values must be in range [0, 255] (default=0).
  905. padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). It can be any of
  906. [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].
  907. - Border.CONSTANT, means it fills the border with constant values.
  908. - Border.EDGE, means it pads with the last value on the edge.
  909. - Border.REFLECT, means it reflects the values on the edge omitting the last
  910. value of edge.
  911. - Border.SYMMETRIC, means it reflects the values on the edge repeating the last
  912. value of edge.
  913. Examples:
  914. >>> decode_op = c_vision.Decode()
  915. >>> random_crop_with_bbox_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
  916. >>> transforms_list = [decode_op, random_crop_with_bbox_op]
  917. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  918. ... input_columns=["image"])
  919. """
  920. @check_random_crop
  921. def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
  922. if isinstance(size, int):
  923. size = (size, size)
  924. if padding is None:
  925. padding = (0, 0, 0, 0)
  926. else:
  927. padding = parse_padding(padding)
  928. if isinstance(fill_value, int):
  929. fill_value = tuple([fill_value] * 3)
  930. self.size = size
  931. self.padding = padding
  932. self.pad_if_needed = pad_if_needed
  933. self.fill_value = fill_value
  934. self.padding_mode = padding_mode.value
  935. def parse(self):
  936. border_type = DE_C_BORDER_TYPE[self.padding_mode]
  937. return cde.RandomCropWithBBoxOperation(self.size, self.padding, self.pad_if_needed, self.fill_value,
  938. border_type)
  939. class RandomEqualize(ImageTensorOperation):
  940. """
  941. Apply histogram equalization on the input image with a given probability.
  942. Args:
  943. prob (float, optional): Probability of the image being equalized, which
  944. must be in range of [0, 1] (default=0.5).
  945. Examples:
  946. >>> transforms_list = [c_vision.Decode(), c_vision.RandomEqualize(0.5)]
  947. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  948. ... input_columns=["image"])
  949. """
  950. @check_prob
  951. def __init__(self, prob=0.5):
  952. self.prob = prob
  953. def parse(self):
  954. return cde.RandomEqualizeOperation(self.prob)
  955. class RandomHorizontalFlip(ImageTensorOperation):
  956. """
  957. Randomly flip the input image horizontally with a given probability.
  958. Args:
  959. prob (float, optional): Probability of the image being flipped (default=0.5).
  960. Examples:
  961. >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlip(0.75)]
  962. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  963. ... input_columns=["image"])
  964. """
  965. @check_prob
  966. def __init__(self, prob=0.5):
  967. self.prob = prob
  968. def parse(self):
  969. return cde.RandomHorizontalFlipOperation(self.prob)
  970. class RandomHorizontalFlipWithBBox(ImageTensorOperation):
  971. """
  972. Flip the input image horizontally randomly with a given probability and adjust bounding boxes accordingly.
  973. Args:
  974. prob (float, optional): Probability of the image being flipped (default=0.5).
  975. Examples:
  976. >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlipWithBBox(0.70)]
  977. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  978. ... input_columns=["image"])
  979. """
  980. @check_prob
  981. def __init__(self, prob=0.5):
  982. self.prob = prob
  983. def parse(self):
  984. return cde.RandomHorizontalFlipWithBBoxOperation(self.prob)
  985. class RandomInvert(ImageTensorOperation):
  986. """
  987. Randomly invert the colors of image with a given probability.
  988. Args:
  989. prob (float, optional): Probability of the image being inverted, which must be in range of [0, 1] (default=0.5).
  990. Examples:
  991. >>> transforms_list = [c_vision.Decode(), c_vision.RandomInvert(0.5)]
  992. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  993. ... input_columns=["image"])
  994. """
  995. @check_prob
  996. def __init__(self, prob=0.5):
  997. self.prob = prob
  998. def parse(self):
  999. return cde.RandomInvertOperation(self.prob)
  1000. class RandomLighting(ImageTensorOperation):
  1001. """
  1002. Add AlexNet-style PCA-based noise to an image. The eigenvalue and eigenvectors for Alexnet's PCA noise is
  1003. calculated from the imagenet dataset.
  1004. Args:
  1005. alpha (float, optional): Intensity of the image (default=0.05).
  1006. Examples:
  1007. >>> transforms_list = [c_vision.Decode(), c_vision.RandomLighting(0.1)]
  1008. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1009. ... input_columns=["image"])
  1010. """
  1011. @check_alpha
  1012. def __init__(self, alpha=0.05):
  1013. self.alpha = alpha
  1014. def parse(self):
  1015. return cde.RandomLightingOperation(self.alpha)
  1016. class RandomPosterize(ImageTensorOperation):
  1017. """
  1018. Reduce the number of bits for each color channel to posterize the input image randomly with a given probability.
  1019. Args:
  1020. bits (sequence or int, optional): Range of random posterize to compress image.
  1021. Bits values must be in range of [1,8], and include at
  1022. least one integer value in the given range. It must be in
  1023. (min, max) or integer format. If min=max, then it is a single fixed
  1024. magnitude operation (default=(8, 8)).
  1025. Examples:
  1026. >>> transforms_list = [c_vision.Decode(), c_vision.RandomPosterize((6, 8))]
  1027. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1028. ... input_columns=["image"])
  1029. """
  1030. @check_posterize
  1031. def __init__(self, bits=(8, 8)):
  1032. self.bits = bits
  1033. def parse(self):
  1034. bits = self.bits
  1035. if isinstance(bits, int):
  1036. bits = (bits, bits)
  1037. return cde.RandomPosterizeOperation(bits)
  1038. class RandomResizedCrop(ImageTensorOperation):
  1039. """
  1040. Crop the input image to a random size and aspect ratio. This operator will crop the input image randomly, and
  1041. resize the cropped image using a selected interpolation mode.
  1042. Note:
  1043. If the input image is more than one, then make sure that the image size is the same.
  1044. Args:
  1045. size (Union[int, sequence]): The output size of the resized image.
  1046. If size is an integer, a square crop of size (size, size) is returned.
  1047. If size is a sequence of length 2, it should be (height, width).
  1048. scale (list, tuple, optional): Range [min, max) of respective size of the original
  1049. size to be cropped (default=(0.08, 1.0)).
  1050. ratio (list, tuple, optional): Range [min, max) of aspect ratio to be cropped
  1051. (default=(3. / 4., 4. / 3.)).
  1052. interpolation (Inter mode, optional): Image interpolation mode for resize operator (default=Inter.BILINEAR).
  1053. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC, Inter.AREA, Inter.PILCUBIC].
  1054. - Inter.BILINEAR, means interpolation method is bilinear interpolation.
  1055. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  1056. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  1057. - Inter.AREA, means interpolation method is pixel area interpolation.
  1058. - Inter.PILCUBIC, means interpolation method is bicubic interpolation like implemented in pillow, input
  1059. should be in 3 channels format.
  1060. max_attempts (int, optional): The maximum number of attempts to propose a valid
  1061. crop_area (default=10). If exceeded, fall back to use center_crop instead.
  1062. Examples:
  1063. >>> from mindspore.dataset.vision import Inter
  1064. >>> decode_op = c_vision.Decode()
  1065. >>> resize_crop_op = c_vision.RandomResizedCrop(size=(50, 75), scale=(0.25, 0.5),
  1066. ... interpolation=Inter.BILINEAR)
  1067. >>> transforms_list = [decode_op, resize_crop_op]
  1068. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1069. ... input_columns=["image"])
  1070. """
  1071. @check_random_resize_crop
  1072. def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
  1073. interpolation=Inter.BILINEAR, max_attempts=10):
  1074. if isinstance(size, int):
  1075. size = (size, size)
  1076. self.size = size
  1077. self.scale = scale
  1078. self.ratio = ratio
  1079. self.interpolation = interpolation
  1080. self.max_attempts = max_attempts
  1081. def parse(self):
  1082. return cde.RandomResizedCropOperation(self.size, self.scale, self.ratio, DE_C_INTER_MODE[self.interpolation],
  1083. self.max_attempts)
  1084. class RandomResizedCropWithBBox(ImageTensorOperation):
  1085. """
  1086. Crop the input image to a random size and aspect ratio and adjust bounding boxes accordingly.
  1087. Args:
  1088. size (Union[int, sequence]): The size of the output image.
  1089. If size is an integer, a square crop of size (size, size) is returned.
  1090. If size is a sequence of length 2, it should be (height, width).
  1091. scale (list, tuple, optional): Range (min, max) of respective size of the original
  1092. size to be cropped (default=(0.08, 1.0)).
  1093. ratio (list, tuple, optional): Range (min, max) of aspect ratio to be cropped
  1094. (default=(3. / 4., 4. / 3.)).
  1095. interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR).
  1096. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  1097. - Inter.BILINEAR, means interpolation method is bilinear interpolation.
  1098. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  1099. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  1100. max_attempts (int, optional): The maximum number of attempts to propose a valid
  1101. crop area (default=10). If exceeded, fall back to use center crop instead.
  1102. Examples:
  1103. >>> from mindspore.dataset.vision import Inter
  1104. >>> decode_op = c_vision.Decode()
  1105. >>> bbox_op = c_vision.RandomResizedCropWithBBox(size=50, interpolation=Inter.NEAREST)
  1106. >>> transforms_list = [decode_op, bbox_op]
  1107. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1108. ... input_columns=["image"])
  1109. """
  1110. @check_random_resize_crop
  1111. def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
  1112. interpolation=Inter.BILINEAR, max_attempts=10):
  1113. if isinstance(size, int):
  1114. size = (size, size)
  1115. self.size = size
  1116. self.scale = scale
  1117. self.ratio = ratio
  1118. self.interpolation = interpolation
  1119. self.max_attempts = max_attempts
  1120. def parse(self):
  1121. return cde.RandomResizedCropWithBBoxOperation(self.size, self.scale, self.ratio,
  1122. DE_C_INTER_MODE[self.interpolation], self.max_attempts)
  1123. class RandomResize(ImageTensorOperation):
  1124. """
  1125. Resize the input image using a randomly selected interpolation mode.
  1126. Args:
  1127. size (Union[int, sequence]): The output size of the resized image.
  1128. If size is an integer, smaller edge of the image will be resized to this value with
  1129. the same image aspect ratio.
  1130. If size is a sequence of length 2, it should be (height, width).
  1131. Examples:
  1132. >>> # randomly resize image, keeping aspect ratio
  1133. >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResize(50)]
  1134. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  1135. ... input_columns=["image"])
  1136. >>> # randomly resize image to landscape style
  1137. >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResize((40, 60))]
  1138. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  1139. ... input_columns=["image"])
  1140. """
  1141. @check_resize
  1142. def __init__(self, size):
  1143. self.size = size
  1144. def parse(self):
  1145. size = self.size
  1146. if isinstance(size, int):
  1147. size = (size,)
  1148. return cde.RandomResizeOperation(size)
  1149. class RandomResizeWithBBox(ImageTensorOperation):
  1150. """
  1151. Tensor operation to resize the input image using a randomly selected interpolation mode and adjust
  1152. bounding boxes accordingly.
  1153. Args:
  1154. size (Union[int, sequence]): The output size of the resized image.
  1155. If size is an integer, smaller edge of the image will be resized to this value with
  1156. the same image aspect ratio.
  1157. If size is a sequence of length 2, it should be (height, width).
  1158. Examples:
  1159. >>> # randomly resize image with bounding boxes, keeping aspect ratio
  1160. >>> transforms_list1 = [c_vision.Decode(), c_vision.RandomResizeWithBBox(60)]
  1161. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  1162. ... input_columns=["image"])
  1163. >>> # randomly resize image with bounding boxes to portrait style
  1164. >>> transforms_list2 = [c_vision.Decode(), c_vision.RandomResizeWithBBox((80, 60))]
  1165. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  1166. ... input_columns=["image"])
  1167. """
  1168. @check_resize
  1169. def __init__(self, size):
  1170. self.size = size
  1171. def parse(self):
  1172. size = self.size
  1173. if isinstance(size, int):
  1174. size = (size,)
  1175. return cde.RandomResizeWithBBoxOperation(size)
  1176. class RandomRotation(ImageTensorOperation):
  1177. """
  1178. Rotate the input image randomly within a specified range of degrees.
  1179. Args:
  1180. degrees (Union[int, float, sequence]): Range of random rotation degrees.
  1181. If `degrees` is a number, the range will be converted to (-degrees, degrees).
  1182. If `degrees` is a sequence, it should be (min, max).
  1183. resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST).
  1184. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  1185. - Inter.BILINEAR, means resample method is bilinear interpolation.
  1186. - Inter.NEAREST, means resample method is nearest-neighbor interpolation.
  1187. - Inter.BICUBIC, means resample method is bicubic interpolation.
  1188. expand (bool, optional): Optional expansion flag (default=False). If set to True, expand the output
  1189. image to make it large enough to hold the entire rotated image.
  1190. If set to False or omitted, make the output image the same size as the input.
  1191. Note that the expand flag assumes rotation around the center and no translation.
  1192. center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None).
  1193. Origin is the top left corner. None sets to the center of the image.
  1194. fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image.
  1195. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
  1196. If it is an integer, it is used for all RGB channels.
  1197. The fill_value values must be in range [0, 255] (default=0).
  1198. Examples:
  1199. >>> from mindspore.dataset.vision import Inter
  1200. >>> transforms_list = [c_vision.Decode(),
  1201. ... c_vision.RandomRotation(degrees=5.0,
  1202. ... resample=Inter.NEAREST,
  1203. ... expand=True)]
  1204. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1205. ... input_columns=["image"])
  1206. """
  1207. @check_random_rotation
  1208. def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0):
  1209. if isinstance(degrees, (int, float)):
  1210. degrees = degrees % 360
  1211. degrees = [-degrees, degrees]
  1212. elif isinstance(degrees, (list, tuple)):
  1213. if degrees[1] - degrees[0] >= 360:
  1214. degrees = [-180, 180]
  1215. else:
  1216. degrees = [degrees[0] % 360, degrees[1] % 360]
  1217. if degrees[0] > degrees[1]:
  1218. degrees[1] += 360
  1219. if center is None:
  1220. center = ()
  1221. if isinstance(fill_value, int):
  1222. fill_value = tuple([fill_value] * 3)
  1223. self.degrees = degrees
  1224. self.resample = resample
  1225. self.expand = expand
  1226. self.center = center
  1227. self.fill_value = fill_value
  1228. def parse(self):
  1229. return cde.RandomRotationOperation(self.degrees, DE_C_INTER_MODE[self.resample], self.expand, self.center,
  1230. self.fill_value)
  1231. class RandomSelectSubpolicy(ImageTensorOperation):
  1232. """
  1233. Choose a random sub-policy from a policy list to be applied on the input image.
  1234. Args:
  1235. policy (list(list(tuple(TensorOp, prob (float)))): List of sub-policies to choose from.
  1236. A sub-policy is a list of tuples (op, prob), where op is a TensorOp operation and prob is the probability
  1237. that this op will be applied, and the prob values must be in range [0, 1]. Once a sub-policy is selected,
  1238. each op within the sub-policy with be applied in sequence according to its probability.
  1239. Examples:
  1240. >>> policy = [[(c_vision.RandomRotation((45, 45)), 0.5),
  1241. ... (c_vision.RandomVerticalFlip(), 1),
  1242. ... (c_vision.RandomColorAdjust(), 0.8)],
  1243. ... [(c_vision.RandomRotation((90, 90)), 1),
  1244. ... (c_vision.RandomColorAdjust(), 0.2)]]
  1245. >>> image_folder_dataset = image_folder_dataset.map(operations=c_vision.RandomSelectSubpolicy(policy),
  1246. ... input_columns=["image"])
  1247. """
  1248. @check_random_select_subpolicy_op
  1249. def __init__(self, policy):
  1250. self.policy = policy
  1251. def parse(self):
  1252. policy = []
  1253. for list_one in self.policy:
  1254. policy_one = []
  1255. for list_two in list_one:
  1256. if list_two[0] and getattr(list_two[0], 'parse', None):
  1257. policy_one.append((list_two[0].parse(), list_two[1]))
  1258. else:
  1259. policy_one.append((list_two[0], list_two[1]))
  1260. policy.append(policy_one)
  1261. return cde.RandomSelectSubpolicyOperation(policy)
  1262. class RandomSharpness(ImageTensorOperation):
  1263. """
  1264. Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image,
  1265. degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image.
  1266. Args:
  1267. degrees (Union[list, tuple], optional): Range of random sharpness adjustment degrees. It should be in
  1268. (min, max) format. If min=max, then it is a single fixed magnitude operation (default = (0.1, 1.9)).
  1269. Raises:
  1270. TypeError : If `degrees` is not a list or tuple.
  1271. ValueError: If `degrees` is negative.
  1272. ValueError: If `degrees` is in (max, min) format instead of (min, max).
  1273. Examples:
  1274. >>> transforms_list = [c_vision.Decode(), c_vision.RandomSharpness(degrees=(0.2, 1.9))]
  1275. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1276. ... input_columns=["image"])
  1277. """
  1278. @check_positive_degrees
  1279. def __init__(self, degrees=(0.1, 1.9)):
  1280. self.degrees = degrees
  1281. def parse(self):
  1282. return cde.RandomSharpnessOperation(self.degrees)
  1283. class RandomSolarize(ImageTensorOperation):
  1284. """
  1285. Randomly selects a subrange within the specified threshold range and sets the pixel value within
  1286. the subrange to (255 - pixel).
  1287. Args:
  1288. threshold (tuple, optional): Range of random solarize threshold (default=(0, 255)).
  1289. Threshold values should always be in (min, max) format,
  1290. where min and max are integers in the range (0, 255), and min <= max.
  1291. If min=max, then invert all pixel values above min(max).
  1292. Examples:
  1293. >>> transforms_list = [c_vision.Decode(), c_vision.RandomSolarize(threshold=(10,100))]
  1294. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1295. ... input_columns=["image"])
  1296. """
  1297. @check_random_solarize
  1298. def __init__(self, threshold=(0, 255)):
  1299. self.threshold = threshold
  1300. def parse(self):
  1301. return cde.RandomSolarizeOperation(self.threshold)
  1302. class RandomVerticalFlip(ImageTensorOperation):
  1303. """
  1304. Randomly flip the input image vertically with a given probability.
  1305. Args:
  1306. prob (float, optional): Probability of the image being flipped (default=0.5).
  1307. Examples:
  1308. >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlip(0.25)]
  1309. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1310. ... input_columns=["image"])
  1311. """
  1312. @check_prob
  1313. def __init__(self, prob=0.5):
  1314. self.prob = prob
  1315. def parse(self):
  1316. return cde.RandomVerticalFlipOperation(self.prob)
  1317. class RandomVerticalFlipWithBBox(ImageTensorOperation):
  1318. """
  1319. Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly.
  1320. Args:
  1321. prob (float, optional): Probability of the image being flipped (default=0.5).
  1322. Examples:
  1323. >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlipWithBBox(0.20)]
  1324. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1325. ... input_columns=["image"])
  1326. """
  1327. @check_prob
  1328. def __init__(self, prob=0.5):
  1329. self.prob = prob
  1330. def parse(self):
  1331. return cde.RandomVerticalFlipWithBBoxOperation(self.prob)
  1332. class Rescale(ImageTensorOperation):
  1333. """
  1334. Rescale the input image with the given rescale and shift. This operator will rescale the input image
  1335. with: output = image * rescale + shift.
  1336. Args:
  1337. rescale (float): Rescale factor.
  1338. shift (float): Shift factor.
  1339. Examples:
  1340. >>> transforms_list = [c_vision.Decode(), c_vision.Rescale(1.0 / 255.0, -1.0)]
  1341. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1342. ... input_columns=["image"])
  1343. """
  1344. @check_rescale
  1345. def __init__(self, rescale, shift):
  1346. self.rescale = rescale
  1347. self.shift = shift
  1348. def parse(self):
  1349. return cde.RescaleOperation(self.rescale, self.shift)
  1350. class Resize(ImageTensorOperation):
  1351. """
  1352. Resize the input image to the given size with a given interpolation mode.
  1353. Args:
  1354. size (Union[int, sequence]): The output size of the resized image.
  1355. If size is an integer, the smaller edge of the image will be resized to this value with
  1356. the same image aspect ratio.
  1357. If size is a sequence of length 2, it should be (height, width).
  1358. interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR).
  1359. It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC, Inter.AREA, Inter.PILCUBIC].
  1360. - Inter.LINEAR, means interpolation method is bilinear interpolation.
  1361. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  1362. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  1363. - Inter.AREA, means interpolation method is pixel area interpolation.
  1364. - Inter.PILCUBIC, means interpolation method is bicubic interpolation like implemented in pillow, input
  1365. should be in 3 channels format.
  1366. Examples:
  1367. >>> from mindspore.dataset.vision import Inter
  1368. >>> decode_op = c_vision.Decode()
  1369. >>> resize_op = c_vision.Resize([100, 75], Inter.BICUBIC)
  1370. >>> transforms_list = [decode_op, resize_op]
  1371. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1372. ... input_columns=["image"])
  1373. """
  1374. @check_resize_interpolation
  1375. def __init__(self, size, interpolation=Inter.LINEAR):
  1376. if isinstance(size, int):
  1377. size = (size,)
  1378. self.size = size
  1379. self.interpolation = interpolation
  1380. def parse(self):
  1381. return cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation])
  1382. class ResizeWithBBox(ImageTensorOperation):
  1383. """
  1384. Resize the input image to the given size and adjust bounding boxes accordingly.
  1385. Args:
  1386. size (Union[int, sequence]): The output size of the resized image.
  1387. If size is an integer, smaller edge of the image will be resized to this value with
  1388. the same image aspect ratio.
  1389. If size is a sequence of length 2, it should be (height, width).
  1390. interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR).
  1391. It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC].
  1392. - Inter.LINEAR, means interpolation method is bilinear interpolation.
  1393. - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
  1394. - Inter.BICUBIC, means interpolation method is bicubic interpolation.
  1395. Examples:
  1396. >>> from mindspore.dataset.vision import Inter
  1397. >>> decode_op = c_vision.Decode()
  1398. >>> bbox_op = c_vision.ResizeWithBBox(50, Inter.NEAREST)
  1399. >>> transforms_list = [decode_op, bbox_op]
  1400. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1401. ... input_columns=["image"])
  1402. """
  1403. @check_resize_interpolation
  1404. def __init__(self, size, interpolation=Inter.LINEAR):
  1405. self.size = size
  1406. self.interpolation = interpolation
  1407. def parse(self):
  1408. size = self.size
  1409. if isinstance(size, int):
  1410. size = (size,)
  1411. return cde.ResizeWithBBoxOperation(size, DE_C_INTER_MODE[self.interpolation])
  1412. class RgbToBgr(ImageTensorOperation):
  1413. """
  1414. Convert RGB image to BGR.
  1415. Examples:
  1416. >>> from mindspore.dataset.vision import Inter
  1417. >>>
  1418. >>> decode_op = c_vision.Decode()
  1419. >>> rgb2bgr_op = c_vision.RgbToBgr()
  1420. >>> transforms_list = [decode_op, rgb2bgr_op]
  1421. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1422. ... input_columns=["image"])
  1423. """
  1424. def parse(self):
  1425. return cde.RgbToBgrOperation()
  1426. class Rotate(ImageTensorOperation):
  1427. """
  1428. Rotate the input image by specified degrees.
  1429. Args:
  1430. degrees (Union[int, float]): Rotation degrees.
  1431. resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST).
  1432. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
  1433. - Inter.BILINEAR, means resample method is bilinear interpolation.
  1434. - Inter.NEAREST, means resample method is nearest-neighbor interpolation.
  1435. - Inter.BICUBIC, means resample method is bicubic interpolation.
  1436. expand (bool, optional): Optional expansion flag (default=False). If set to True, expand the output
  1437. image to make it large enough to hold the entire rotated image.
  1438. If set to False or omitted, make the output image the same size as the input.
  1439. Note that the expand flag assumes rotation around the center and no translation.
  1440. center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None).
  1441. Origin is the top left corner. None sets to the center of the image.
  1442. fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image.
  1443. If it is a 3-tuple, it is used to fill R, G, B channels respectively.
  1444. If it is an integer, it is used for all RGB channels.
  1445. The fill_value values must be in range [0, 255] (default=0).
  1446. Examples:
  1447. >>> from mindspore.dataset.vision import Inter
  1448. >>> transforms_list = [c_vision.Decode(),
  1449. ... c_vision.Rotate(degrees=30.0,
  1450. ... resample=Inter.NEAREST,
  1451. ... expand=True)]
  1452. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1453. ... input_columns=["image"])
  1454. """
  1455. @check_rotate
  1456. def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0):
  1457. if isinstance(degrees, (int, float)):
  1458. degrees = degrees % 360
  1459. if center is None:
  1460. center = ()
  1461. if isinstance(fill_value, int):
  1462. fill_value = tuple([fill_value] * 3)
  1463. self.degrees = degrees
  1464. self.resample = resample
  1465. self.expand = expand
  1466. self.center = center
  1467. self.fill_value = fill_value
  1468. def parse(self):
  1469. return cde.RotateOperation(self.degrees, DE_C_INTER_MODE[self.resample], self.expand, self.center,
  1470. self.fill_value)
  1471. class SlicePatches(ImageTensorOperation):
  1472. """
  1473. Slice Tensor to multiple patches in horizontal and vertical directions.
  1474. The usage scenario is suitable to large height and width Tensor. The Tensor
  1475. will keep the same if set both num_height and num_width to 1. And the
  1476. number of output tensors is equal to num_height*num_width.
  1477. Args:
  1478. num_height (int, optional): The number of patches in vertical direction (default=1).
  1479. num_width (int, optional): The number of patches in horizontal direction (default=1).
  1480. slice_mode (Inter mode, optional): A mode represents pad or drop (default=SliceMode.PAD).
  1481. It can be any of [SliceMode.PAD, SliceMode.DROP].
  1482. fill_value (int, optional): The border width in number of pixels in
  1483. right and bottom direction if slice_mode is set to be SliceMode.PAD (default=0).
  1484. Examples:
  1485. >>> # default padding mode
  1486. >>> decode_op = c_vision.Decode()
  1487. >>> num_h, num_w = (1, 4)
  1488. >>> slice_patches_op = c_vision.SlicePatches(num_h, num_w)
  1489. >>> transforms_list = [decode_op, slice_patches_op]
  1490. >>> cols = ['img' + str(x) for x in range(num_h*num_w)]
  1491. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1492. ... input_columns=["image"],
  1493. ... output_columns=cols, column_order=cols)
  1494. """
  1495. @check_slice_patches
  1496. def __init__(self, num_height=1, num_width=1, slice_mode=SliceMode.PAD, fill_value=0):
  1497. self.num_height = num_height
  1498. self.num_width = num_width
  1499. self.slice_mode = slice_mode
  1500. self.fill_value = fill_value
  1501. def parse(self):
  1502. return cde.SlicePatchesOperation(self.num_height, self.num_width,
  1503. DE_C_SLICE_MODE[self.slice_mode], self.fill_value)
  1504. class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation):
  1505. """
  1506. A combination of `Crop`, `Decode` and `Resize` using the simulation algorithm of Ascend series chip DVPP module.
  1507. The usage scenario is consistent with SoftDvppDecodeResizeJpeg.
  1508. The input image size should be in range [32*32, 8192*8192].
  1509. The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  1510. Only images with an even resolution can be output. The output of odd resolution is not supported.
  1511. Args:
  1512. size (Union[int, sequence]): The size of the output image.
  1513. If size is an integer, a square crop of size (size, size) is returned.
  1514. If size is a sequence of length 2, it should be (height, width).
  1515. scale (list, tuple, optional): Range [min, max) of respective size of the
  1516. original size to be cropped (default=(0.08, 1.0)).
  1517. ratio (list, tuple, optional): Range [min, max) of aspect ratio to be
  1518. cropped (default=(3. / 4., 4. / 3.)).
  1519. max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10).
  1520. If exceeded, fall back to use center_crop instead.
  1521. Examples:
  1522. >>> # decode, randomly crop and resize image, keeping aspect ratio
  1523. >>> transforms_list1 = [c_vision.SoftDvppDecodeRandomCropResizeJpeg(90)]
  1524. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  1525. ... input_columns=["image"])
  1526. >>> # decode, randomly crop and resize to landscape style
  1527. >>> transforms_list2 = [c_vision.SoftDvppDecodeRandomCropResizeJpeg((80, 100))]
  1528. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  1529. ... input_columns=["image"])
  1530. """
  1531. @check_soft_dvpp_decode_random_crop_resize_jpeg
  1532. def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), max_attempts=10):
  1533. if isinstance(size, int):
  1534. size = (size, size)
  1535. self.size = size
  1536. self.scale = scale
  1537. self.ratio = ratio
  1538. self.max_attempts = max_attempts
  1539. def parse(self):
  1540. return cde.SoftDvppDecodeRandomCropResizeJpegOperation(self.size, self.scale, self.ratio, self.max_attempts)
  1541. class SoftDvppDecodeResizeJpeg(ImageTensorOperation):
  1542. """
  1543. Decode and resize JPEG image using the simulation algorithm of Ascend series chip DVPP module.
  1544. It is recommended to use this algorithm in the following scenarios:
  1545. When training, the DVPP of the Ascend chip is not used,
  1546. and the DVPP of the Ascend chip is used during inference,
  1547. and the accuracy of inference is lower than the accuracy of training;
  1548. and the input image size should be in range [32*32, 8192*8192].
  1549. The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  1550. Only images with an even resolution can be output. The output of odd resolution is not supported.
  1551. Args:
  1552. size (Union[int, sequence]): The output size of the resized image.
  1553. If size is an integer, smaller edge of the image will be resized to this value with
  1554. the same image aspect ratio.
  1555. If size is a sequence of length 2, it should be (height, width).
  1556. Examples:
  1557. >>> # decode and resize image, keeping aspect ratio
  1558. >>> transforms_list1 = [c_vision.SoftDvppDecodeResizeJpeg(70)]
  1559. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1,
  1560. ... input_columns=["image"])
  1561. >>> # decode and resize to portrait style
  1562. >>> transforms_list2 = [c_vision.SoftDvppDecodeResizeJpeg((80, 60))]
  1563. >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2,
  1564. ... input_columns=["image"])
  1565. """
  1566. @check_resize
  1567. def __init__(self, size):
  1568. if isinstance(size, int):
  1569. size = (size,)
  1570. self.size = size
  1571. def parse(self):
  1572. return cde.SoftDvppDecodeResizeJpegOperation(self.size)
  1573. class UniformAugment(ImageTensorOperation):
  1574. """
  1575. Perform randomly selected augmentation on input image.
  1576. Args:
  1577. transforms: List of C++ operations (Python operations are not accepted).
  1578. num_ops (int, optional): Number of operations to be selected and applied (default=2).
  1579. Examples:
  1580. >>> import mindspore.dataset.vision.py_transforms as py_vision
  1581. >>> transforms_list = [c_vision.RandomHorizontalFlip(),
  1582. ... c_vision.RandomVerticalFlip(),
  1583. ... c_vision.RandomColorAdjust(),
  1584. ... c_vision.RandomRotation(degrees=45)]
  1585. >>> uni_aug_op = c_vision.UniformAugment(transforms=transforms_list, num_ops=2)
  1586. >>> transforms_all = [c_vision.Decode(), c_vision.Resize(size=[224, 224]),
  1587. ... uni_aug_op]
  1588. >>> image_folder_dataset_1 = image_folder_dataset.map(operations=transforms_all,
  1589. ... input_columns="image",
  1590. ... num_parallel_workers=1)
  1591. """
  1592. @check_uniform_augment_cpp
  1593. def __init__(self, transforms, num_ops=2):
  1594. self.transforms = transforms
  1595. self.num_ops = num_ops
  1596. def parse(self):
  1597. transforms = []
  1598. for op in self.transforms:
  1599. if op and getattr(op, 'parse', None):
  1600. transforms.append(op.parse())
  1601. else:
  1602. transforms.append(op)
  1603. return cde.UniformAugOperation(transforms, self.num_ops)
  1604. class VerticalFlip(ImageTensorOperation):
  1605. """
  1606. Flip the input image vertically.
  1607. Examples:
  1608. >>> transforms_list = [c_vision.Decode(), c_vision.VerticalFlip()]
  1609. >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
  1610. ... input_columns=["image"])
  1611. """
  1612. def parse(self):
  1613. return cde.VerticalFlipOperation()