You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

vision.h 29 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_
  18. #include <map>
  19. #include <memory>
  20. #include <string>
  21. #include <utility>
  22. #include <vector>
  23. #include "include/api/status.h"
  24. #include "minddata/dataset/include/constants.h"
  25. #include "minddata/dataset/include/transforms.h"
  26. #include "minddata/dataset/include/vision_lite.h"
  27. namespace mindspore {
  28. namespace dataset {
  29. // Transform operations for performing computer vision.
  30. namespace vision {
  31. // Transform Op classes (in alphabetical order)
  32. class AutoContrastOperation;
  33. class BoundingBoxAugmentOperation;
  34. class CutMixBatchOperation;
  35. class CutOutOperation;
  36. class EqualizeOperation;
  37. class HwcToChwOperation;
  38. class InvertOperation;
  39. class MixUpBatchOperation;
  40. class NormalizePadOperation;
  41. class PadOperation;
  42. class RandomAffineOperation;
  43. class RandomColorOperation;
  44. class RandomColorAdjustOperation;
  45. class RandomCropOperation;
  46. class RandomCropDecodeResizeOperation;
  47. class RandomCropWithBBoxOperation;
  48. class RandomHorizontalFlipOperation;
  49. class RandomHorizontalFlipWithBBoxOperation;
  50. class RandomPosterizeOperation;
  51. class RandomResizeOperation;
  52. class RandomResizeWithBBoxOperation;
  53. class RandomResizedCropOperation;
  54. class RandomResizedCropWithBBoxOperation;
  55. class RandomRotationOperation;
  56. class RandomSelectSubpolicyOperation;
  57. class RandomSharpnessOperation;
  58. class RandomSolarizeOperation;
  59. class RandomVerticalFlipOperation;
  60. class RandomVerticalFlipWithBBoxOperation;
  61. class RescaleOperation;
  62. class ResizeWithBBoxOperation;
  63. class RgbaToBgrOperation;
  64. class RgbaToRgbOperation;
  65. class SoftDvppDecodeRandomCropResizeJpegOperation;
  66. class SoftDvppDecodeResizeJpegOperation;
  67. class SwapRedBlueOperation;
  68. class UniformAugOperation;
  69. /// \brief Function to create a AutoContrast TensorOperation.
  70. /// \notes Apply automatic contrast on input image.
  71. /// \param[in] cutoff Percent of pixels to cut off from the histogram, the valid range of cutoff value is 0 to 100.
  72. /// \param[in] ignore Pixel values to ignore.
  73. /// \return Shared pointer to the current TensorOperation.
  74. std::shared_ptr<AutoContrastOperation> AutoContrast(float cutoff = 0.0, std::vector<uint32_t> ignore = {});
  75. /// \brief Function to create a BoundingBoxAugment TensorOperation.
  76. /// \notes Apply a given image transform on a random selection of bounding box regions of a given image.
  77. /// \param[in] transform A TensorOperation transform.
  78. /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
  79. /// \return Shared pointer to the current TensorOperation.
  80. std::shared_ptr<BoundingBoxAugmentOperation> BoundingBoxAugment(std::shared_ptr<TensorOperation> transform,
  81. float ratio = 0.3);
  82. /// \brief Function to apply CutMix on a batch of images
  83. /// \notes Masks a random section of each image with the corresponding part of another randomly
  84. /// selected image in that batch
  85. /// \param[in] image_batch_format The format of the batch
  86. /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0)
  87. /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0)
  88. /// \return Shared pointer to the current TensorOp
  89. std::shared_ptr<CutMixBatchOperation> CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0,
  90. float prob = 1.0);
  91. /// \brief Function to create a CutOut TensorOp
  92. /// \notes Randomly cut (mask) out a given number of square patches from the input image
  93. /// \param[in] length Integer representing the side length of each square patch
  94. /// \param[in] num_patches Integer representing the number of patches to be cut out of an image
  95. /// \return Shared pointer to the current TensorOp
  96. std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches = 1);
  97. /// \brief Function to create a Equalize TensorOperation.
  98. /// \notes Apply histogram equalization on input image.
  99. /// \return Shared pointer to the current TensorOperation.
  100. std::shared_ptr<EqualizeOperation> Equalize();
  101. /// \brief Function to create a HwcToChw TensorOperation.
  102. /// \notes Transpose the input image; shape (H, W, C) to shape (C, H, W).
  103. /// \return Shared pointer to the current TensorOperation.
  104. std::shared_ptr<HwcToChwOperation> HWC2CHW();
  105. /// \brief Function to create a Invert TensorOperation.
  106. /// \notes Apply invert on input image in RGB mode.
  107. /// \return Shared pointer to the current TensorOperation.
  108. std::shared_ptr<InvertOperation> Invert();
  109. /// \brief Function to create a MixUpBatch TensorOperation.
  110. /// \notes Apply MixUp transformation on an input batch of images and labels. The labels must be in
  111. /// one-hot format and Batch must be called before calling this function.
  112. /// \param[in] alpha hyperparameter of beta distribution (default = 1.0)
  113. /// \return Shared pointer to the current TensorOperation.
  114. std::shared_ptr<MixUpBatchOperation> MixUpBatch(float alpha = 1);
  115. /// \brief Function to create a NormalizePad TensorOperation.
  116. /// \notes Normalize the input image with respect to mean and standard deviation and pad an extra
  117. /// channel with value zero.
  118. /// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
  119. /// The mean values must be in range [0.0, 255.0].
  120. /// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
  121. /// The standard deviation values must be in range (0.0, 255.0]
  122. /// \param[in] dtype The output datatype of Tensor.
  123. /// The standard deviation values must be "float32" or "float16"(default = "float32")
  124. /// \return Shared pointer to the current TensorOperation.
  125. std::shared_ptr<NormalizePadOperation> NormalizePad(const std::vector<float> &mean, const std::vector<float> &std,
  126. const std::string &dtype = "float32");
  127. /// \brief Function to create a Pad TensorOp
  128. /// \notes Pads the image according to padding parameters
  129. /// \param[in] padding A vector representing the number of pixels to pad the image
  130. /// If vector has one value, it pads all sides of the image with that value.
  131. /// If vector has two values, it pads left and top with the first and
  132. /// right and bottom with the second value.
  133. /// If vector has four values, it pads left, top, right, and bottom with
  134. /// those values respectively.
  135. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  136. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels. If 3 values are provided,
  137. /// it is used to fill R, G, B channels respectively.
  138. /// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
  139. /// Can be any of
  140. /// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
  141. /// - BorderType.kConstant, means it fills the border with constant values
  142. /// - BorderType.kEdge, means it pads with the last value on the edge
  143. /// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
  144. /// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
  145. /// \return Shared pointer to the current TensorOp
  146. std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
  147. BorderType padding_mode = BorderType::kConstant);
  148. /// \brief Function to create a RandomAffine TensorOperation.
  149. /// \notes Applies a Random Affine transformation on input image in RGB or Greyscale mode.
  150. /// \param[in] degrees A float vector of size 2, representing the starting and ending degree
  151. /// \param[in] translate_range A float vector of size 2 or 4, representing percentages of translation on x and y axes.
  152. /// if size is 2, (min_dx, max_dx, 0, 0)
  153. /// if size is 4, (min_dx, max_dx, min_dy, max_dy)
  154. /// all values are in range [-1, 1]
  155. /// \param[in] scale_range A float vector of size 2, representing the starting and ending scales in the range.
  156. /// \param[in] shear_ranges A float vector of size 2 or 4, representing the starting and ending shear degrees
  157. /// vertically and horizontally.
  158. /// if size is 2, (min_shear_x, max_shear_x, 0, 0)
  159. /// if size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y)
  160. /// \param[in] interpolation An enum for the mode of interpolation
  161. /// \param[in] fill_value A vector representing the value to fill the area outside the transform
  162. /// in the output image. If 1 value is provided, it is used for all RGB channels.
  163. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  164. /// \return Shared pointer to the current TensorOperation.
  165. std::shared_ptr<RandomAffineOperation> RandomAffine(
  166. const std::vector<float_t> &degrees, const std::vector<float_t> &translate_range = {0.0, 0.0, 0.0, 0.0},
  167. const std::vector<float_t> &scale_range = {1.0, 1.0}, const std::vector<float_t> &shear_ranges = {0.0, 0.0, 0.0, 0.0},
  168. InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
  169. const std::vector<uint8_t> &fill_value = {0, 0, 0});
  170. /// \brief Blends an image with its grayscale version with random weights
  171. /// t and 1 - t generated from a given range. If the range is trivial
  172. /// then the weights are determinate and t equals the bound of the interval
  173. /// \param[in] t_lb Lower bound on the range of random weights
  174. /// \param[in] t_lb Upper bound on the range of random weights
  175. /// \return Shared pointer to the current TensorOp
  176. std::shared_ptr<RandomColorOperation> RandomColor(float t_lb, float t_ub);
  177. /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
  178. /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
  179. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  180. /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
  181. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  182. /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
  183. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  184. /// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
  185. /// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
  186. /// Default value is {0, 0}
  187. /// \return Shared pointer to the current TensorOp
  188. std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0},
  189. std::vector<float> contrast = {1.0, 1.0},
  190. std::vector<float> saturation = {1.0, 1.0},
  191. std::vector<float> hue = {0.0, 0.0});
  192. /// \brief Function to create a RandomCrop TensorOperation.
  193. /// \notes Crop the input image at a random location.
  194. /// \param[in] size A vector representing the output size of the cropped image.
  195. /// If size is a single value, a square crop of size (size, size) is returned.
  196. /// If size has 2 values, it should be (height, width).
  197. /// \param[in] padding A vector representing the number of pixels to pad the image
  198. /// If vector has one value, it pads all sides of the image with that value.
  199. /// If vector has two values, it pads left and top with the first and
  200. /// right and bottom with the second value.
  201. /// If vector has four values, it pads left, top, right, and bottom with
  202. /// those values respectively.
  203. /// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
  204. /// the given output size.
  205. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  206. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
  207. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  208. /// \return Shared pointer to the current TensorOperation.
  209. std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
  210. bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
  211. BorderType padding_mode = BorderType::kConstant);
  212. /// \brief Function to create a RandomCropDecodeResize TensorOperation.
  213. /// \notes Equivalent to RandomResizedCrop, but crops before decodes.
  214. /// \param[in] size A vector representing the output size of the cropped image.
  215. /// If size is a single value, a square crop of size (size, size) is returned.
  216. /// If size has 2 values, it should be (height, width).
  217. /// \param[in] scale Range [min, max) of respective size of the
  218. /// original size to be cropped (default=(0.08, 1.0))
  219. /// \param[in] ratio Range [min, max) of aspect ratio to be
  220. /// cropped (default=(3. / 4., 4. / 3.))
  221. /// \param[in] interpolation An enum for the mode of interpolation
  222. /// \param[in] The maximum number of attempts to propose a valid crop_area (default=10).
  223. /// If exceeded, fall back to use center_crop instead.
  224. /// \return Shared pointer to the current TensorOperation.
  225. std::shared_ptr<RandomCropDecodeResizeOperation> RandomCropDecodeResize(
  226. std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, std::vector<float> ratio = {3. / 4, 4. / 3},
  227. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  228. /// \brief Function to create a RandomCropWithBBox TensorOperation.
  229. /// \Crop the input image at a random location and adjust bounding boxes accordingly.
  230. /// \param[in] size A vector representing the output size of the cropped image.
  231. /// If size is a single value, a square crop of size (size, size) is returned.
  232. /// If size has 2 values, it should be (height, width).
  233. /// \param[in] padding A vector representing the number of pixels to pad the image
  234. /// If vector has one value, it pads all sides of the image with that value.
  235. /// If vector has two values, it pads left and top with the first and
  236. /// right and bottom with the second value.
  237. /// If vector has four values, it pads left, top, right, and bottom with
  238. /// those values respectively.
  239. /// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
  240. /// the given output size.
  241. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  242. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
  243. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  244. /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
  245. /// [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
  246. /// \return Shared pointer to the current TensorOperation.
  247. std::shared_ptr<RandomCropWithBBoxOperation> RandomCropWithBBox(std::vector<int32_t> size,
  248. std::vector<int32_t> padding = {0, 0, 0, 0},
  249. bool pad_if_needed = false,
  250. std::vector<uint8_t> fill_value = {0, 0, 0},
  251. BorderType padding_mode = BorderType::kConstant);
  252. /// \brief Function to create a RandomHorizontalFlip TensorOperation.
  253. /// \notes Tensor operation to perform random horizontal flip.
  254. /// \param[in] prob A float representing the probability of flip.
  255. /// \return Shared pointer to the current TensorOperation.
  256. std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob = 0.5);
  257. /// \brief Function to create a RandomHorizontalFlipWithBBox TensorOperation.
  258. /// \notes Flip the input image horizontally, randomly with a given probability and adjust bounding boxes accordingly.
  259. /// \param[in] prob A float representing the probability of flip.
  260. /// \return Shared pointer to the current TensorOperation.
  261. std::shared_ptr<RandomHorizontalFlipWithBBoxOperation> RandomHorizontalFlipWithBBox(float prob = 0.5);
  262. /// \brief Function to create a RandomPosterize TensorOperation.
  263. /// \notes Tensor operation to perform random posterize.
  264. /// \param[in] bit_range - uint8_t vector representing the minimum and maximum bit in range. (Default={4, 8})
  265. /// \return Shared pointer to the current TensorOperation.
  266. std::shared_ptr<RandomPosterizeOperation> RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});
  267. /// \brief Function to create a RandomResize TensorOperation.
  268. /// \notes Resize the input image using a randomly selected interpolation mode.
  269. /// \param[in] size A vector representing the output size of the resized image.
  270. /// If size is a single value, the smaller edge of the image will be resized to this value with
  271. // the same image aspect ratio. If size has 2 values, it should be (height, width).
  272. std::shared_ptr<RandomResizeOperation> RandomResize(std::vector<int32_t> size);
  273. /// \brief Function to create a RandomResizeWithBBox TensorOperation.
  274. /// \notes Resize the input image using a randomly selected interpolation mode and adjust
  275. /// bounding boxes accordingly.
  276. /// \param[in] size A vector representing the output size of the resized image.
  277. /// If size is a single value, the smaller edge of the image will be resized to this value with
  278. // the same image aspect ratio. If size has 2 values, it should be (height, width).
  279. std::shared_ptr<RandomResizeWithBBoxOperation> RandomResizeWithBBox(std::vector<int32_t> size);
  280. /// \brief Function to create a RandomResizedCrop TensorOperation.
  281. /// \notes Crop the input image to a random size and aspect ratio.
  282. /// \param[in] size A vector representing the output size of the cropped image.
  283. /// If size is a single value, a square crop of size (size, size) is returned.
  284. /// If size has 2 values, it should be (height, width).
  285. /// \param[in] scale Range [min, max) of respective size of the original
  286. /// size to be cropped (default=(0.08, 1.0))
  287. /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
  288. /// (default=(3. / 4., 4. / 3.)).
  289. /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear)
  290. /// \param[in] max_attempts The maximum number of attempts to propose a valid
  291. /// crop_area (default=10). If exceeded, fall back to use center_crop instead.
  292. /// \return Shared pointer to the current TensorOperation.
  293. std::shared_ptr<RandomResizedCropOperation> RandomResizedCrop(
  294. std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, std::vector<float> ratio = {3. / 4., 4. / 3.},
  295. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  296. /// \brief Function to create a RandomResizedCropWithBBox TensorOperation.
  297. /// \notes Crop the input image to a random size and aspect ratio.
  298. /// \param[in] size A vector representing the output size of the cropped image.
  299. /// If size is a single value, a square crop of size (size, size) is returned.
  300. /// If size has 2 values, it should be (height, width).
  301. /// \param[in] scale Range [min, max) of respective size of the original
  302. /// size to be cropped (default=(0.08, 1.0))
  303. /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
  304. /// (default=(3. / 4., 4. / 3.)).
  305. /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear)
  306. /// \param[in] max_attempts The maximum number of attempts to propose a valid
  307. /// crop_area (default=10). If exceeded, fall back to use center_crop instead.
  308. /// \return Shared pointer to the current TensorOperation.
  309. std::shared_ptr<RandomResizedCropWithBBoxOperation> RandomResizedCropWithBBox(
  310. std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, std::vector<float> ratio = {3. / 4., 4. / 3.},
  311. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  312. /// \brief Function to create a RandomRotation TensorOp
  313. /// \notes Rotates the image according to parameters
  314. /// \param[in] degrees A float vector of size, representing the starting and ending degree
  315. /// \param[in] resample An enum for the mode of interpolation
  316. /// \param[in] expand A boolean representing whether the image is expanded after rotation
  317. /// \param[in] center A float vector of size 2, representing the x and y center of rotation.
  318. /// \param[in] fill_value A vector representing the value to fill the area outside the transform
  319. /// in the output image. If 1 value is provided, it is used for all RGB channels.
  320. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  321. /// \return Shared pointer to the current TensorOp
  322. std::shared_ptr<RandomRotationOperation> RandomRotation(
  323. std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
  324. std::vector<float> center = {-1, -1}, std::vector<uint8_t> fill_value = {0, 0, 0});
  325. /// \brief Function to create a RandomSelectSubpolicy TensorOperation.
  326. /// \notes Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples
  327. /// (op, prob), where op is a TensorOp operation and prob is the probability that this op will be applied. Once
  328. /// a sub-policy is selected, each op within the subpolicy with be applied in sequence according to its probability.
  329. /// \param[in] policy Vector of sub-policies to choose from.
  330. /// \return Shared pointer to the current TensorOperation.
  331. std::shared_ptr<RandomSelectSubpolicyOperation> RandomSelectSubpolicy(
  332. std::vector<std::vector<std::pair<std::shared_ptr<TensorOperation>, double>>> policy);
  333. /// \brief Function to create a RandomSharpness TensorOperation.
  334. /// \notes Tensor operation to perform random sharpness.
  335. /// \param[in] degrees A float vector of size 2, representing the starting and ending degree to uniformly
  336. /// sample from, to select a degree to adjust sharpness.
  337. /// \return Shared pointer to the current TensorOperation.
  338. std::shared_ptr<RandomSharpnessOperation> RandomSharpness(std::vector<float> degrees = {0.1, 1.9});
  339. /// \brief Function to create a RandomSolarize TensorOperation.
  340. /// \notes Invert pixels randomly within specified range. If min=max, it is a single fixed magnitude operation
  341. /// to inverts all pixel above that threshold
  342. /// \param[in] threshold A vector with two elements specifying the pixel range to invert.
  343. /// \return Shared pointer to the current TensorOperation.
  344. std::shared_ptr<RandomSolarizeOperation> RandomSolarize(std::vector<uint8_t> threshold = {0, 255});
  345. /// \brief Function to create a RandomVerticalFlip TensorOperation.
  346. /// \notes Tensor operation to perform random vertical flip.
  347. /// \param[in] prob A float representing the probability of flip.
  348. /// \return Shared pointer to the current TensorOperation.
  349. std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob = 0.5);
  350. /// \brief Function to create a RandomVerticalFlipWithBBox TensorOperation.
  351. /// \notes Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly.
  352. /// \param[in] prob A float representing the probability of flip.
  353. /// \return Shared pointer to the current TensorOperation.
  354. std::shared_ptr<RandomVerticalFlipWithBBoxOperation> RandomVerticalFlipWithBBox(float prob = 0.5);
  355. /// \brief Function to create a RescaleOperation TensorOperation.
  356. /// \notes Tensor operation to rescale the input image.
  357. /// \param[in] rescale Rescale factor.
  358. /// \param[in] shift Shift factor.
  359. /// \return Shared pointer to the current TensorOperation.
  360. std::shared_ptr<RescaleOperation> Rescale(float rescale, float shift);
  361. /// \brief Function to create a ResizeWithBBox TensorOperation.
  362. /// \notes Resize the input image to the given size and adjust bounding boxes accordingly.
  363. /// \param[in] size The output size of the resized image.
  364. /// If size is an integer, smaller edge of the image will be resized to this value with the same image aspect ratio.
  365. /// If size is a sequence of length 2, it should be (height, width).
  366. /// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear).
  367. /// \return Shared pointer to the current TensorOperation.
  368. std::shared_ptr<ResizeWithBBoxOperation> ResizeWithBBox(std::vector<int32_t> size,
  369. InterpolationMode interpolation = InterpolationMode::kLinear);
  370. /// \brief Function to create a RgbaToBgr TensorOperation.
  371. /// \notes Changes the input 4 channel RGBA tensor to 3 channel BGR.
  372. /// \return Shared pointer to the current TensorOperation.
  373. std::shared_ptr<RgbaToBgrOperation> RGBA2BGR();
  374. /// \brief Function to create a RgbaToRgb TensorOperation.
  375. /// \notes Changes the input 4 channel RGBA tensor to 3 channel RGB.
  376. /// \return Shared pointer to the current TensorOperation.
  377. std::shared_ptr<RgbaToRgbOperation> RGBA2RGB();
  378. /// \brief Function to create a SoftDvppDecodeRandomCropResizeJpeg TensorOperation.
  379. /// \notes Tensor operation to decode, random crop and resize JPEG image using the simulation algorithm of
  380. /// Ascend series chip DVPP module. The usage scenario is consistent with SoftDvppDecodeResizeJpeg.
  381. /// The input image size should be in range [32*32, 8192*8192].
  382. /// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  383. /// Only images with an even resolution can be output. The output of odd resolution is not supported.
  384. /// \param[in] size A vector representing the output size of the resized image.
  385. /// If size is a single value, smaller edge of the image will be resized to this value with
  386. /// the same image aspect ratio. If size has 2 values, it should be (height, width).
  387. /// \return Shared pointer to the current TensorOperation.
  388. std::shared_ptr<SoftDvppDecodeRandomCropResizeJpegOperation> SoftDvppDecodeRandomCropResizeJpeg(
  389. std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, std::vector<float> ratio = {3. / 4., 4. / 3.},
  390. int32_t max_attempts = 10);
  391. /// \brief Function to create a SoftDvppDecodeResizeJpeg TensorOperation.
  392. /// \notes Tensor operation to decode and resize JPEG image using the simulation algorithm of Ascend series
  393. /// chip DVPP module. It is recommended to use this algorithm in the following scenarios:
  394. /// When training, the DVPP of the Ascend chip is not used,
  395. /// and the DVPP of the Ascend chip is used during inference,
  396. /// and the accuracy of inference is lower than the accuracy of training;
  397. /// and the input image size should be in range [32*32, 8192*8192].
  398. /// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  399. /// Only images with an even resolution can be output. The output of odd resolution is not supported.
  400. /// \param[in] size A vector representing the output size of the resized image.
  401. /// If size is a single value, smaller edge of the image will be resized to this value with
  402. /// the same image aspect ratio. If size has 2 values, it should be (height, width).
  403. /// \return Shared pointer to the current TensorOperation.
  404. std::shared_ptr<SoftDvppDecodeResizeJpegOperation> SoftDvppDecodeResizeJpeg(std::vector<int32_t> size);
  405. /// \brief Function to create a SwapRedBlue TensorOp
  406. /// \notes Swaps the red and blue channels in image
  407. /// \return Shared pointer to the current TensorOp
  408. std::shared_ptr<SwapRedBlueOperation> SwapRedBlue();
  409. /// \brief Function to create a UniformAugment TensorOperation.
  410. /// \notes Tensor operation to perform randomly selected augmentation.
  411. /// \param[in] transforms A vector of TensorOperation transforms.
  412. /// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
  413. /// \return Shared pointer to the current TensorOperation.
  414. std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> transforms,
  415. int32_t num_ops = 2);
  416. } // namespace vision
  417. } // namespace dataset
  418. } // namespace mindspore
  419. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_