You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

vision.h 30 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_
  18. #include <memory>
  19. #include <vector>
  20. #include "minddata/dataset/core/constants.h"
  21. #include "minddata/dataset/include/transforms.h"
  22. #include "minddata/dataset/util/status.h"
  23. namespace mindspore {
  24. namespace dataset {
  25. namespace api {
  26. // Transform operations for performing computer vision.
  27. namespace vision {
  28. // Transform Op classes (in alphabetical order)
  29. class CenterCropOperation;
  30. class CropOperation;
  31. class CutMixBatchOperation;
  32. class CutOutOperation;
  33. class DecodeOperation;
  34. class HwcToChwOperation;
  35. class MixUpBatchOperation;
  36. class NormalizeOperation;
  37. class PadOperation;
  38. class RandomAffineOperation;
  39. class RandomColorOperation;
  40. class RandomColorAdjustOperation;
  41. class RandomCropOperation;
  42. class RandomCropDecodeResizeOperation;
  43. class RandomHorizontalFlipOperation;
  44. class RandomPosterizeOperation;
  45. class RandomResizedCropOperation;
  46. class RandomRotationOperation;
  47. class RandomSharpnessOperation;
  48. class RandomSolarizeOperation;
  49. class RandomVerticalFlipOperation;
  50. class RescaleOperation;
  51. class ResizeOperation;
  52. class RgbaToBgrOperation;
  53. class RgbaToRgbOperation;
  54. class SwapRedBlueOperation;
  55. class UniformAugOperation;
  56. /// \brief Function to create a CenterCrop TensorOperation.
  57. /// \notes Crops the input image at the center to the given size.
  58. /// \param[in] size A vector representing the output size of the cropped image.
  59. /// If size is a single value, a square crop of size (size, size) is returned.
  60. /// If size has 2 values, it should be (height, width).
  61. /// \return Shared pointer to the current TensorOperation.
  62. std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size);
  63. /// \brief Function to create a Crop TensorOp
  64. /// \notes Crop an image based on location and crop size
  65. /// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor}
  66. /// \param[in] size Size of the cropped area. Must be a vector of two values, in the form of {height, width}
  67. /// \return Shared pointer to the current TensorOp
  68. std::shared_ptr<CropOperation> Crop(std::vector<int32_t> coordinates, std::vector<int32_t> size);
  69. /// \brief Function to apply CutMix on a batch of images
  70. /// \notes Masks a random section of each image with the corresponding part of another randomly
  71. /// selected image in that batch
  72. /// \param[in] image_batch_format The format of the batch
  73. /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0)
  74. /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0)
  75. /// \return Shared pointer to the current TensorOp
  76. std::shared_ptr<CutMixBatchOperation> CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0,
  77. float prob = 1.0);
  78. /// \brief Function to create a CutOut TensorOp
  79. /// \notes Randomly cut (mask) out a given number of square patches from the input image
  80. /// \param[in] length Integer representing the side length of each square patch
  81. /// \param[in] num_patches Integer representing the number of patches to be cut out of an image
  82. /// \return Shared pointer to the current TensorOp
  83. std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches = 1);
  84. /// \brief Function to create a Decode TensorOperation.
  85. /// \notes Decode the input image in RGB mode.
  86. /// \param[in] rgb A boolean of whether to decode in RGB mode or not.
  87. /// \return Shared pointer to the current TensorOperation.
  88. std::shared_ptr<DecodeOperation> Decode(bool rgb = true);
  89. /// \brief Function to create a HwcToChw TensorOperation.
  90. /// \notes Transpose the input image; shape (H, W, C) to shape (C, H, W).
  91. /// \return Shared pointer to the current TensorOperation.
  92. std::shared_ptr<HwcToChwOperation> HWC2CHW();
  93. /// \brief Function to create a MixUpBatch TensorOperation.
  94. /// \notes Apply MixUp transformation on an input batch of images and labels. The labels must be in
  95. /// one-hot format and Batch must be called before calling this function.
  96. /// \param[in] alpha hyperparameter of beta distribution (default = 1.0)
  97. /// \return Shared pointer to the current TensorOperation.
  98. std::shared_ptr<MixUpBatchOperation> MixUpBatch(float alpha = 1);
  99. /// \brief Function to create a Normalize TensorOperation.
  100. /// \notes Normalize the input image with respect to mean and standard deviation.
  101. /// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
  102. /// The mean values must be in range (0.0, 255.0].
  103. /// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
  104. /// The standard deviation values must be in range (0.0, 255.0]
  105. /// \return Shared pointer to the current TensorOperation.
  106. std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std);
  107. /// \brief Function to create a Pad TensorOp
  108. /// \notes Pads the image according to padding parameters
  109. /// \param[in] padding A vector representing the number of pixels to pad the image
  110. /// If vector has one value, it pads all sides of the image with that value
  111. /// If vector has two values, it pads left and right with the first and
  112. /// top and bottom with the second value
  113. /// If vector has four values, it pads left, top, right, and bottom with
  114. /// those values respectively
  115. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  116. /// BorderType.kConstant. If 3 values are provided,
  117. /// it is used to fill R, G, B channels respectively
  118. /// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
  119. /// Can be any of
  120. /// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
  121. /// - BorderType.kConstant, means it fills the border with constant values
  122. /// - BorderType.kEdge, means it pads with the last value on the edge
  123. /// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
  124. /// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
  125. /// \return Shared pointer to the current TensorOp
  126. std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
  127. BorderType padding_mode = BorderType::kConstant);
  128. /// \brief Function to create a RandomAffine TensorOperation.
  129. /// \notes Applies a Random Affine transformation on input image in RGB or Greyscale mode.
  130. /// \param[in] degrees A float vector of size 2, representing the starting and ending degree
  131. /// \param[in] translate_range A float vector of size 2 or 4, representing percentages of translation on x and y axes.
  132. /// if size is 2, (min_dx, max_dx, 0, 0)
  133. /// if size is 4, (min_dx, max_dx, min_dy, max_dy)
  134. /// all values are in range [-1, 1]
  135. /// \param[in] scale_range A float vector of size 2, representing the starting and ending scales in the range.
  136. /// \param[in] shear_ranges A float vector of size 2 or 4, representing the starting and ending shear degrees
  137. /// vertically and horizontally.
  138. /// if size is 2, (min_shear_x, max_shear_x, 0, 0)
  139. /// if size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y)
  140. /// \param[in] interpolation An enum for the mode of interpolation
  141. /// \param[in] fill_value A uint8_t vector of size 3, representing the pixel intensity of the borders, it is used to
  142. /// fill R, G, B channels respectively.
  143. /// \return Shared pointer to the current TensorOperation.
  144. std::shared_ptr<RandomAffineOperation> RandomAffine(
  145. const std::vector<float_t> &degrees, const std::vector<float_t> &translate_range = {0.0, 0.0, 0.0, 0.0},
  146. const std::vector<float_t> &scale_range = {1.0, 1.0}, const std::vector<float_t> &shear_ranges = {0.0, 0.0, 0.0, 0.0},
  147. InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
  148. const std::vector<uint8_t> &fill_value = {0, 0, 0});
  149. /// \brief Blends an image with its grayscale version with random weights
  150. /// t and 1 - t generated from a given range. If the range is trivial
  151. /// then the weights are determinate and t equals the bound of the interval
  152. /// \param[in] t_lb Lower bound on the range of random weights
  153. /// \param[in] t_lb Upper bound on the range of random weights
  154. /// \return Shared pointer to the current TensorOp
  155. std::shared_ptr<RandomColorOperation> RandomColor(float t_lb, float t_ub);
  156. /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
  157. /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
  158. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  159. /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
  160. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  161. /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
  162. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  163. /// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
  164. /// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
  165. /// Default value is {0, 0}
  166. /// \return Shared pointer to the current TensorOp
  167. std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0},
  168. std::vector<float> contrast = {1.0, 1.0},
  169. std::vector<float> saturation = {1.0, 1.0},
  170. std::vector<float> hue = {0.0, 0.0});
  171. /// \brief Function to create a RandomCrop TensorOperation.
  172. /// \notes Crop the input image at a random location.
  173. /// \param[in] size A vector representing the output size of the cropped image.
  174. /// If size is a single value, a square crop of size (size, size) is returned.
  175. /// If size has 2 values, it should be (height, width).
  176. /// \param[in] padding A vector with the value of pixels to pad the image. If 4 values are provided,
  177. /// it pads the left, top, right and bottom respectively.
  178. /// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
  179. /// the given output size.
  180. /// \param[in] fill_value A vector representing the pixel intensity of the borders, it is used to
  181. /// fill R, G, B channels respectively.
  182. /// \return Shared pointer to the current TensorOperation.
  183. std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
  184. bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
  185. BorderType padding_mode = BorderType::kConstant);
  186. /// \brief Function to create a RandomCropDecodeResize TensorOperation.
  187. /// \notes Equivalent to RandomResizedCrop, but crops before decodes.
  188. /// \param[in] size A vector representing the output size of the cropped image.
  189. /// If size is a single value, a square crop of size (size, size) is returned.
  190. /// If size has 2 values, it should be (height, width).
  191. /// \param[in] scale Range [min, max) of respective size of the
  192. /// original size to be cropped (default=(0.08, 1.0))
  193. /// \param[in] ratio Range [min, max) of aspect ratio to be
  194. /// cropped (default=(3. / 4., 4. / 3.))
  195. /// \param[in] interpolation An enum for the mode of interpolation
  196. /// \param[in] The maximum number of attempts to propose a valid crop_area (default=10).
  197. /// If exceeded, fall back to use center_crop instead.
  198. /// \return Shared pointer to the current TensorOperation.
  199. std::shared_ptr<RandomCropDecodeResizeOperation> RandomCropDecodeResize(
  200. std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, std::vector<float> ratio = {3. / 4, 4. / 3},
  201. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  202. /// \brief Function to create a RandomHorizontalFlip TensorOperation.
  203. /// \notes Tensor operation to perform random horizontal flip.
  204. /// \param[in] prob A float representing the probability of flip.
  205. /// \return Shared pointer to the current TensorOperation.
  206. std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob = 0.5);
  207. /// \brief Function to create a RandomPosterize TensorOperation.
  208. /// \notes Tensor operation to perform random posterize.
  209. /// \param[in] bit_range - uint8_t vector representing the minimum and maximum bit in range. (Default={4, 8})
  210. /// \return Shared pointer to the current TensorOperation.
  211. std::shared_ptr<RandomPosterizeOperation> RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});
  212. /// \brief Function to create a RandomResizedCrop TensorOperation.
  213. /// \notes Crop the input image to a random size and aspect ratio.
  214. /// \param[in] size A vector representing the output size of the cropped image.
  215. /// If size is a single value, a square crop of size (size, size) is returned.
  216. /// If size has 2 values, it should be (height, width).
  217. /// \param[in] scale Range [min, max) of respective size of the original
  218. /// size to be cropped (default=(0.08, 1.0))
  219. /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
  220. /// (default=(3. / 4., 4. / 3.)).
  221. /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear)
  222. /// \param[in] max_attempts The maximum number of attempts to propose a valid
  223. /// crop_area (default=10). If exceeded, fall back to use center_crop instead.
  224. /// \return Shared pointer to the current TensorOperation.
  225. std::shared_ptr<RandomResizedCropOperation> RandomResizedCrop(
  226. std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0}, std::vector<float> ratio = {3. / 4., 4. / 3.},
  227. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  228. /// \brief Function to create a RandomRotation TensorOp
  229. /// \notes Rotates the image according to parameters
  230. /// \param[in] degrees A float vector of size 2, representing the starting and ending degree
  231. /// \param[in] resample An enum for the mode of interpolation
  232. /// \param[in] expand A boolean representing whether the image is expanded after rotation
  233. /// \param[in] center A float vector of size 2, representing the x and y center of rotation.
  234. /// \param[in] fill_value A uint8_t vector of size 3, representing the rgb value of the fill color
  235. /// \return Shared pointer to the current TensorOp
  236. std::shared_ptr<RandomRotationOperation> RandomRotation(
  237. std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
  238. std::vector<float> center = {-1, -1}, std::vector<uint8_t> fill_value = {0, 0, 0});
  239. /// \brief Function to create a RandomSharpness TensorOperation.
  240. /// \notes Tensor operation to perform random sharpness.
  241. /// \param[in] degrees A float vector of size 2, representing the starting and ending degree to uniformly
  242. /// sample from, to select a degree to adjust sharpness.
  243. /// \return Shared pointer to the current TensorOperation.
  244. std::shared_ptr<RandomSharpnessOperation> RandomSharpness(std::vector<float> degrees = {0.1, 1.9});
  245. /// \brief Function to create a RandomSolarize TensorOperation.
  246. /// \notes Invert pixels randomly within specified range. If min=max, it is a single fixed magnitude operation
  247. /// to inverts all pixel above that threshold
  248. /// \param[in] threshold A vector with two elements specifying the pixel range to invert.
  249. /// \return Shared pointer to the current TensorOperation.
  250. std::shared_ptr<RandomSolarizeOperation> RandomSolarize(std::vector<uint8_t> threshold = {0, 255});
  251. /// \brief Function to create a RandomVerticalFlip TensorOperation.
  252. /// \notes Tensor operation to perform random vertical flip.
  253. /// \param[in] prob A float representing the probability of flip.
  254. /// \return Shared pointer to the current TensorOperation.
  255. std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob = 0.5);
  256. /// \brief Function to create a RescaleOperation TensorOperation.
  257. /// \notes Tensor operation to rescale the input image.
  258. /// \param[in] rescale Rescale factor.
  259. /// \param[in] shift Shift factor.
  260. /// \return Shared pointer to the current TensorOperation.
  261. std::shared_ptr<RescaleOperation> Rescale(float rescale, float shift);
  262. /// \brief Function to create a Resize TensorOperation.
  263. /// \notes Resize the input image to the given size.
  264. /// \param[in] size A vector representing the output size of the resized image.
  265. /// If size is a single value, the image will be resized to this value with
  266. /// the same image aspect ratio. If size has 2 values, it should be (height, width).
  267. /// \param[in] interpolation An enum for the mode of interpolation
  268. /// \return Shared pointer to the current TensorOperation.
  269. std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size,
  270. InterpolationMode interpolation = InterpolationMode::kLinear);
  271. /// \brief Function to create a RgbaToBgr TensorOperation.
  272. /// \notes Changes the input 4 channel RGBA tensor to 3 channel BGR.
  273. /// \return Shared pointer to the current TensorOperation.
  274. std::shared_ptr<RgbaToBgrOperation> RGBA2BGR();
  275. /// \brief Function to create a RgbaToRgb TensorOperation.
  276. /// \notes Changes the input 4 channel RGBA tensor to 3 channel RGB.
  277. /// \return Shared pointer to the current TensorOperation.
  278. std::shared_ptr<RgbaToRgbOperation> RGBA2RGB();
  279. /// \brief Function to create a SwapRedBlue TensorOp
  280. /// \notes Swaps the red and blue channels in image
  281. /// \return Shared pointer to the current TensorOp
  282. std::shared_ptr<SwapRedBlueOperation> SwapRedBlue();
  283. /// \brief Function to create a UniformAugment TensorOperation.
  284. /// \notes Tensor operation to perform randomly selected augmentation.
  285. /// \param[in] transforms A vector of TensorOperation transforms.
  286. /// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
  287. /// \return Shared pointer to the current TensorOperation.
  288. std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> transforms,
  289. int32_t num_ops = 2);
  290. /* ####################################### Derived TensorOperation classes ################################# */
  291. class CenterCropOperation : public TensorOperation {
  292. public:
  293. explicit CenterCropOperation(std::vector<int32_t> size);
  294. ~CenterCropOperation() = default;
  295. std::shared_ptr<TensorOp> Build() override;
  296. Status ValidateParams() override;
  297. private:
  298. std::vector<int32_t> size_;
  299. };
  300. class CropOperation : public TensorOperation {
  301. public:
  302. CropOperation(std::vector<int32_t> coordinates, std::vector<int32_t> size);
  303. ~CropOperation() = default;
  304. std::shared_ptr<TensorOp> Build() override;
  305. Status ValidateParams() override;
  306. private:
  307. std::vector<int32_t> coordinates_;
  308. std::vector<int32_t> size_;
  309. };
  310. class CutMixBatchOperation : public TensorOperation {
  311. public:
  312. explicit CutMixBatchOperation(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);
  313. ~CutMixBatchOperation() = default;
  314. std::shared_ptr<TensorOp> Build() override;
  315. Status ValidateParams() override;
  316. private:
  317. float alpha_;
  318. float prob_;
  319. ImageBatchFormat image_batch_format_;
  320. };
  321. class CutOutOperation : public TensorOperation {
  322. public:
  323. explicit CutOutOperation(int32_t length, int32_t num_patches = 1);
  324. ~CutOutOperation() = default;
  325. std::shared_ptr<TensorOp> Build() override;
  326. Status ValidateParams() override;
  327. private:
  328. int32_t length_;
  329. int32_t num_patches_;
  330. ImageBatchFormat image_batch_format_;
  331. };
  332. class DecodeOperation : public TensorOperation {
  333. public:
  334. explicit DecodeOperation(bool rgb = true);
  335. ~DecodeOperation() = default;
  336. std::shared_ptr<TensorOp> Build() override;
  337. Status ValidateParams() override;
  338. private:
  339. bool rgb_;
  340. };
  341. class HwcToChwOperation : public TensorOperation {
  342. public:
  343. ~HwcToChwOperation() = default;
  344. std::shared_ptr<TensorOp> Build() override;
  345. Status ValidateParams() override;
  346. };
  347. class MixUpBatchOperation : public TensorOperation {
  348. public:
  349. explicit MixUpBatchOperation(float alpha = 1);
  350. ~MixUpBatchOperation() = default;
  351. std::shared_ptr<TensorOp> Build() override;
  352. Status ValidateParams() override;
  353. private:
  354. float alpha_;
  355. };
  356. class NormalizeOperation : public TensorOperation {
  357. public:
  358. NormalizeOperation(std::vector<float> mean, std::vector<float> std);
  359. ~NormalizeOperation() = default;
  360. std::shared_ptr<TensorOp> Build() override;
  361. Status ValidateParams() override;
  362. private:
  363. std::vector<float> mean_;
  364. std::vector<float> std_;
  365. };
  366. class PadOperation : public TensorOperation {
  367. public:
  368. PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
  369. BorderType padding_mode = BorderType::kConstant);
  370. ~PadOperation() = default;
  371. std::shared_ptr<TensorOp> Build() override;
  372. Status ValidateParams() override;
  373. private:
  374. std::vector<int32_t> padding_;
  375. std::vector<uint8_t> fill_value_;
  376. BorderType padding_mode_;
  377. };
  378. class RandomAffineOperation : public TensorOperation {
  379. public:
  380. RandomAffineOperation(const std::vector<float_t> &degrees, const std::vector<float_t> &translate_range = {0.0, 0.0},
  381. const std::vector<float_t> &scale_range = {1.0, 1.0},
  382. const std::vector<float_t> &shear_ranges = {0.0, 0.0, 0.0, 0.0},
  383. InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
  384. const std::vector<uint8_t> &fill_value = {0, 0, 0});
  385. ~RandomAffineOperation() = default;
  386. std::shared_ptr<TensorOp> Build() override;
  387. Status ValidateParams() override;
  388. private:
  389. std::vector<float_t> degrees_; // min_degree, max_degree
  390. std::vector<float_t> translate_range_; // maximum x translation percentage, maximum y translation percentage
  391. std::vector<float_t> scale_range_; // min_scale, max_scale
  392. std::vector<float_t> shear_ranges_; // min_x_shear, max_x_shear, min_y_shear, max_y_shear
  393. InterpolationMode interpolation_;
  394. std::vector<uint8_t> fill_value_;
  395. };
  396. class RandomColorOperation : public TensorOperation {
  397. public:
  398. RandomColorOperation(float t_lb, float t_ub);
  399. ~RandomColorOperation() = default;
  400. std::shared_ptr<TensorOp> Build() override;
  401. Status ValidateParams() override;
  402. private:
  403. float t_lb_;
  404. float t_ub_;
  405. };
  406. class RandomColorAdjustOperation : public TensorOperation {
  407. public:
  408. RandomColorAdjustOperation(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
  409. std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
  410. ~RandomColorAdjustOperation() = default;
  411. std::shared_ptr<TensorOp> Build() override;
  412. Status ValidateParams() override;
  413. private:
  414. std::vector<float> brightness_;
  415. std::vector<float> contrast_;
  416. std::vector<float> saturation_;
  417. std::vector<float> hue_;
  418. };
  419. class RandomCropOperation : public TensorOperation {
  420. public:
  421. RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
  422. bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
  423. BorderType padding_mode = BorderType::kConstant);
  424. ~RandomCropOperation() = default;
  425. std::shared_ptr<TensorOp> Build() override;
  426. Status ValidateParams() override;
  427. private:
  428. std::vector<int32_t> size_;
  429. std::vector<int32_t> padding_;
  430. bool pad_if_needed_;
  431. std::vector<uint8_t> fill_value_;
  432. BorderType padding_mode_;
  433. };
  434. class RandomCropDecodeResizeOperation : public TensorOperation {
  435. public:
  436. RandomCropDecodeResizeOperation(std::vector<int32_t> size, std::vector<float> scale, std::vector<float> ratio,
  437. InterpolationMode interpolation, int32_t max_attempts);
  438. ~RandomCropDecodeResizeOperation() = default;
  439. std::shared_ptr<TensorOp> Build() override;
  440. Status ValidateParams() override;
  441. private:
  442. std::vector<int32_t> size_;
  443. std::vector<float> scale_;
  444. std::vector<float> ratio_;
  445. InterpolationMode interpolation_;
  446. int32_t max_attempts_;
  447. };
  448. class RandomHorizontalFlipOperation : public TensorOperation {
  449. public:
  450. explicit RandomHorizontalFlipOperation(float probability = 0.5);
  451. ~RandomHorizontalFlipOperation() = default;
  452. std::shared_ptr<TensorOp> Build() override;
  453. Status ValidateParams() override;
  454. private:
  455. float probability_;
  456. };
  457. class RandomPosterizeOperation : public TensorOperation {
  458. public:
  459. explicit RandomPosterizeOperation(const std::vector<uint8_t> &bit_range = {4, 8});
  460. ~RandomPosterizeOperation() = default;
  461. std::shared_ptr<TensorOp> Build() override;
  462. Status ValidateParams() override;
  463. private:
  464. std::vector<uint8_t> bit_range_;
  465. };
  466. class RandomResizedCropOperation : public TensorOperation {
  467. public:
  468. explicit RandomResizedCropOperation(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  469. std::vector<float> ratio = {3. / 4., 4. / 3.},
  470. InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
  471. int32_t max_attempts = 10);
  472. ~RandomResizedCropOperation() = default;
  473. std::shared_ptr<TensorOp> Build() override;
  474. Status ValidateParams() override;
  475. private:
  476. std::vector<int32_t> size_;
  477. std::vector<float> scale_;
  478. std::vector<float> ratio_;
  479. InterpolationMode interpolation_;
  480. int32_t max_attempts_;
  481. };
  482. class RandomRotationOperation : public TensorOperation {
  483. public:
  484. RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand,
  485. std::vector<float> center, std::vector<uint8_t> fill_value);
  486. ~RandomRotationOperation() = default;
  487. std::shared_ptr<TensorOp> Build() override;
  488. Status ValidateParams() override;
  489. private:
  490. std::vector<float> degrees_;
  491. InterpolationMode interpolation_mode_;
  492. std::vector<float> center_;
  493. bool expand_;
  494. std::vector<uint8_t> fill_value_;
  495. };
  496. class RandomSharpnessOperation : public TensorOperation {
  497. public:
  498. explicit RandomSharpnessOperation(std::vector<float> degrees = {0.1, 1.9});
  499. ~RandomSharpnessOperation() = default;
  500. std::shared_ptr<TensorOp> Build() override;
  501. Status ValidateParams() override;
  502. private:
  503. std::vector<float> degrees_;
  504. };
  505. class RandomSolarizeOperation : public TensorOperation {
  506. public:
  507. explicit RandomSolarizeOperation(std::vector<uint8_t> threshold);
  508. ~RandomSolarizeOperation() = default;
  509. std::shared_ptr<TensorOp> Build() override;
  510. Status ValidateParams() override;
  511. private:
  512. std::vector<uint8_t> threshold_;
  513. };
  514. class RandomVerticalFlipOperation : public TensorOperation {
  515. public:
  516. explicit RandomVerticalFlipOperation(float probability = 0.5);
  517. ~RandomVerticalFlipOperation() = default;
  518. std::shared_ptr<TensorOp> Build() override;
  519. Status ValidateParams() override;
  520. private:
  521. float probability_;
  522. };
  523. class RescaleOperation : public TensorOperation {
  524. public:
  525. explicit RescaleOperation(float rescale, float shift);
  526. ~RescaleOperation() = default;
  527. std::shared_ptr<TensorOp> Build() override;
  528. Status ValidateParams() override;
  529. private:
  530. float rescale_;
  531. float shift_;
  532. };
  533. class ResizeOperation : public TensorOperation {
  534. public:
  535. explicit ResizeOperation(std::vector<int32_t> size,
  536. InterpolationMode interpolation_mode = InterpolationMode::kLinear);
  537. ~ResizeOperation() = default;
  538. std::shared_ptr<TensorOp> Build() override;
  539. Status ValidateParams() override;
  540. private:
  541. std::vector<int32_t> size_;
  542. InterpolationMode interpolation_;
  543. };
  544. class RgbaToBgrOperation : public TensorOperation {
  545. public:
  546. RgbaToBgrOperation();
  547. ~RgbaToBgrOperation() = default;
  548. std::shared_ptr<TensorOp> Build() override;
  549. Status ValidateParams() override;
  550. };
  551. class RgbaToRgbOperation : public TensorOperation {
  552. public:
  553. RgbaToRgbOperation();
  554. ~RgbaToRgbOperation() = default;
  555. std::shared_ptr<TensorOp> Build() override;
  556. Status ValidateParams() override;
  557. };
  558. class SwapRedBlueOperation : public TensorOperation {
  559. public:
  560. SwapRedBlueOperation();
  561. ~SwapRedBlueOperation() = default;
  562. std::shared_ptr<TensorOp> Build() override;
  563. Status ValidateParams() override;
  564. };
  565. class UniformAugOperation : public TensorOperation {
  566. public:
  567. explicit UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> transforms, int32_t num_ops = 2);
  568. ~UniformAugOperation() = default;
  569. std::shared_ptr<TensorOp> Build() override;
  570. Status ValidateParams() override;
  571. private:
  572. std::vector<std::shared_ptr<TensorOperation>> transforms_;
  573. int32_t num_ops_;
  574. };
  575. } // namespace vision
  576. } // namespace api
  577. } // namespace dataset
  578. } // namespace mindspore
  579. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_