You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

vision.h 39 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_
  18. #include <map>
  19. #include <memory>
  20. #include <string>
  21. #include <utility>
  22. #include <vector>
  23. #include "include/api/dual_abi_helper.h"
  24. #include "include/api/status.h"
  25. #include "minddata/dataset/include/constants.h"
  26. #include "minddata/dataset/include/transforms.h"
  27. #include "minddata/dataset/include/vision_lite.h"
  28. namespace mindspore {
  29. namespace dataset {
  30. class TensorOperation;
  31. // Transform operations for performing computer vision.
  32. namespace vision {
  33. /// \brief AutoContrast TensorTransform.
  34. /// \notes Apply automatic contrast on input image.
  35. class AutoContrast : public TensorTransform {
  36. public:
  37. /// \brief Constructor.
  38. /// \param[in] cutoff Percent of pixels to cut off from the histogram, the valid range of cutoff value is 0 to 100.
  39. /// \param[in] ignore Pixel values to ignore.
  40. explicit AutoContrast(float cutoff = 0.0, std::vector<uint32_t> ignore = {});
  41. /// \brief Destructor.
  42. ~AutoContrast() = default;
  43. protected:
  44. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  45. /// \return Shared pointer to TensorOperation object.
  46. std::shared_ptr<TensorOperation> Parse() override;
  47. private:
  48. struct Data;
  49. std::shared_ptr<Data> data_;
  50. };
  51. /// \brief BoundingBoxAugment TensorTransform.
  52. /// \notes Apply a given image transform on a random selection of bounding box regions of a given image.
  53. class BoundingBoxAugment : public TensorTransform {
  54. public:
  55. /// \brief Constructor.
  56. /// \param[in] transform Raw pointer to a TensorTransform operation.
  57. /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
  58. explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3);
  59. /// \brief Constructor.
  60. /// \param[in] transform Smart pointer to a TensorTransform operation.
  61. /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
  62. explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3);
  63. /// \brief Constructor.
  64. /// \param[in] transform Object pointer to a TensorTransform operation.
  65. /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
  66. explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> transform, float ratio = 0.3);
  67. /// \brief Destructor.
  68. ~BoundingBoxAugment() = default;
  69. protected:
  70. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  71. /// \return Shared pointer to TensorOperation object.
  72. std::shared_ptr<TensorOperation> Parse() override;
  73. private:
  74. struct Data;
  75. std::shared_ptr<Data> data_;
  76. };
  77. /// \brief Constructor to apply CutMix on a batch of images
  78. /// \notes Masks a random section of each image with the corresponding part of another randomly
  79. /// selected image in that batch
  80. class CutMixBatch : public TensorTransform {
  81. public:
  82. /// \brief Constructor.
  83. /// \param[in] image_batch_format The format of the batch
  84. /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0)
  85. /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0)
  86. explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);
  87. /// \brief Destructor.
  88. ~CutMixBatch() = default;
  89. protected:
  90. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  91. /// \return Shared pointer to TensorOperation object.
  92. std::shared_ptr<TensorOperation> Parse() override;
  93. private:
  94. struct Data;
  95. std::shared_ptr<Data> data_;
  96. };
  97. /// \brief CutOut TensorOp
  98. /// \notes Randomly cut (mask) out a given number of square patches from the input image
  99. class CutOut : public TensorTransform {
  100. public:
  101. /// \brief Constructor.
  102. /// \param[in] length Integer representing the side length of each square patch
  103. /// \param[in] num_patches Integer representing the number of patches to be cut out of an image
  104. explicit CutOut(int32_t length, int32_t num_patches = 1);
  105. /// \brief Destructor.
  106. ~CutOut() = default;
  107. protected:
  108. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  109. /// \return Shared pointer to TensorOperation object.
  110. std::shared_ptr<TensorOperation> Parse() override;
  111. private:
  112. struct Data;
  113. std::shared_ptr<Data> data_;
  114. };
  115. /// \brief Equalize TensorTransform.
  116. /// \notes Apply histogram equalization on input image.
  117. class Equalize : public TensorTransform {
  118. public:
  119. /// \brief Constructor.
  120. Equalize();
  121. /// \brief Destructor.
  122. ~Equalize() = default;
  123. protected:
  124. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  125. /// \return Shared pointer to TensorOperation object.
  126. std::shared_ptr<TensorOperation> Parse() override;
  127. };
  128. /// \brief HwcToChw TensorTransform.
  129. /// \notes Transpose the input image; shape (H, W, C) to shape (C, H, W).
  130. class HWC2CHW : public TensorTransform {
  131. public:
  132. /// \brief Constructor.
  133. HWC2CHW();
  134. /// \brief Destructor.
  135. ~HWC2CHW() = default;
  136. protected:
  137. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  138. /// \return Shared pointer to TensorOperation object.
  139. std::shared_ptr<TensorOperation> Parse() override;
  140. };
  141. /// \brief Invert TensorTransform.
  142. /// \notes Apply invert on input image in RGB mode.
  143. class Invert : public TensorTransform {
  144. public:
  145. /// \brief Constructor.
  146. Invert();
  147. /// \brief Destructor.
  148. ~Invert() = default;
  149. protected:
  150. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  151. /// \return Shared pointer to TensorOperation object.
  152. std::shared_ptr<TensorOperation> Parse() override;
  153. };
  154. /// \brief MixUpBatch TensorTransform.
  155. /// \notes Apply MixUp transformation on an input batch of images and labels. The labels must be in
  156. /// one-hot format and Batch must be called before calling this function.
  157. class MixUpBatch : public TensorTransform {
  158. public:
  159. /// \brief Constructor.
  160. /// \param[in] alpha hyperparameter of beta distribution (default = 1.0)
  161. explicit MixUpBatch(float alpha = 1);
  162. /// \brief Destructor.
  163. ~MixUpBatch() = default;
  164. protected:
  165. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  166. /// \return Shared pointer to TensorOperation object.
  167. std::shared_ptr<TensorOperation> Parse() override;
  168. private:
  169. struct Data;
  170. std::shared_ptr<Data> data_;
  171. };
  172. /// \brief NormalizePad TensorTransform.
  173. /// \notes Normalize the input image with respect to mean and standard deviation and pad an extra
  174. /// channel with value zero.
  175. class NormalizePad : public TensorTransform {
  176. public:
  177. /// \brief Constructor.
  178. /// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
  179. /// The mean values must be in range [0.0, 255.0].
  180. /// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
  181. /// The standard deviation values must be in range (0.0, 255.0]
  182. /// \param[in] dtype The output datatype of Tensor.
  183. /// The standard deviation values must be "float32" or "float16"(default = "float32")
  184. explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std,
  185. const std::string &dtype = "float32")
  186. : NormalizePad(mean, std, StringToChar(dtype)) {}
  187. explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype);
  188. /// \brief Destructor.
  189. ~NormalizePad() = default;
  190. protected:
  191. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  192. /// \return Shared pointer to TensorOperation object.
  193. std::shared_ptr<TensorOperation> Parse() override;
  194. private:
  195. struct Data;
  196. std::shared_ptr<Data> data_;
  197. };
  198. /// \brief Pad TensorOp
  199. /// \notes Pads the image according to padding parameters
  200. class Pad : public TensorTransform {
  201. public:
  202. /// \brief Constructor.
  203. /// \param[in] padding A vector representing the number of pixels to pad the image
  204. /// If vector has one value, it pads all sides of the image with that value.
  205. /// If vector has two values, it pads left and top with the first and
  206. /// right and bottom with the second value.
  207. /// If vector has four values, it pads left, top, right, and bottom with
  208. /// those values respectively.
  209. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  210. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels. If 3 values are provided,
  211. /// it is used to fill R, G, B channels respectively.
  212. /// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
  213. /// Can be any of
  214. /// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
  215. /// - BorderType.kConstant, means it fills the border with constant values
  216. /// - BorderType.kEdge, means it pads with the last value on the edge
  217. /// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
  218. /// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
  219. explicit Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
  220. BorderType padding_mode = BorderType::kConstant);
  221. /// \brief Destructor.
  222. ~Pad() = default;
  223. protected:
  224. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  225. /// \return Shared pointer to TensorOperation object.
  226. std::shared_ptr<TensorOperation> Parse() override;
  227. private:
  228. struct Data;
  229. std::shared_ptr<Data> data_;
  230. };
  231. /// \brief Blends an image with its grayscale version with random weights
  232. /// t and 1 - t generated from a given range. If the range is trivial
  233. /// then the weights are determinate and t equals the bound of the interval
  234. class RandomColor : public TensorTransform {
  235. public:
  236. /// \brief Constructor.
  237. /// \param[in] t_lb Lower bound on the range of random weights
  238. /// \param[in] t_lb Upper bound on the range of random weights
  239. explicit RandomColor(float t_lb, float t_ub);
  240. /// \brief Destructor.
  241. ~RandomColor() = default;
  242. protected:
  243. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  244. /// \return Shared pointer to TensorOperation object.
  245. std::shared_ptr<TensorOperation> Parse() override;
  246. private:
  247. struct Data;
  248. std::shared_ptr<Data> data_;
  249. };
  250. /// \brief RandomColorAdjust TensorTransform.
  251. /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
  252. class RandomColorAdjust : public TensorTransform {
  253. public:
  254. /// \brief Constructor.
  255. /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
  256. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  257. /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
  258. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  259. /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
  260. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  261. /// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
  262. /// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
  263. /// Default value is {0, 0}
  264. explicit RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
  265. std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
  266. /// \brief Destructor.
  267. ~RandomColorAdjust() = default;
  268. protected:
  269. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  270. /// \return Shared pointer to TensorOperation object.
  271. std::shared_ptr<TensorOperation> Parse() override;
  272. private:
  273. struct Data;
  274. std::shared_ptr<Data> data_;
  275. };
  276. /// \brief RandomCrop TensorTransform.
  277. /// \notes Crop the input image at a random location.
  278. class RandomCrop : public TensorTransform {
  279. public:
  280. /// \brief Constructor.
  281. /// \param[in] size A vector representing the output size of the cropped image.
  282. /// If size is a single value, a square crop of size (size, size) is returned.
  283. /// If size has 2 values, it should be (height, width).
  284. /// \param[in] padding A vector representing the number of pixels to pad the image
  285. /// If vector has one value, it pads all sides of the image with that value.
  286. /// If vector has two values, it pads left and top with the first and
  287. /// right and bottom with the second value.
  288. /// If vector has four values, it pads left, top, right, and bottom with
  289. /// those values respectively.
  290. /// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
  291. /// the given output size.
  292. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  293. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
  294. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  295. explicit RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
  296. bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
  297. BorderType padding_mode = BorderType::kConstant);
  298. /// \brief Destructor.
  299. ~RandomCrop() = default;
  300. protected:
  301. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  302. /// \return Shared pointer to TensorOperation object.
  303. std::shared_ptr<TensorOperation> Parse() override;
  304. private:
  305. struct Data;
  306. std::shared_ptr<Data> data_;
  307. };
  308. /// \brief RandomCropDecodeResize TensorTransform.
  309. /// \notes Equivalent to RandomResizedCrop, but crops before decodes.
  310. class RandomCropDecodeResize : public TensorTransform {
  311. public:
  312. /// \brief Constructor.
  313. /// \param[in] size A vector representing the output size of the cropped image.
  314. /// If size is a single value, a square crop of size (size, size) is returned.
  315. /// If size has 2 values, it should be (height, width).
  316. /// \param[in] scale Range [min, max) of respective size of the
  317. /// original size to be cropped (default=(0.08, 1.0))
  318. /// \param[in] ratio Range [min, max) of aspect ratio to be
  319. /// cropped (default=(3. / 4., 4. / 3.))
  320. /// \param[in] interpolation An enum for the mode of interpolation
  321. /// \param[in] The maximum number of attempts to propose a valid crop_area (default=10).
  322. /// If exceeded, fall back to use center_crop instead.
  323. explicit RandomCropDecodeResize(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  324. std::vector<float> ratio = {3. / 4, 4. / 3},
  325. InterpolationMode interpolation = InterpolationMode::kLinear,
  326. int32_t max_attempts = 10);
  327. /// \brief Destructor.
  328. ~RandomCropDecodeResize() = default;
  329. protected:
  330. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  331. /// \return Shared pointer to TensorOperation object.
  332. std::shared_ptr<TensorOperation> Parse() override;
  333. private:
  334. struct Data;
  335. std::shared_ptr<Data> data_;
  336. };
  337. /// \brief RandomCropWithBBox TensorTransform.
  338. /// \notes Crop the input image at a random location and adjust bounding boxes accordingly.
  339. class RandomCropWithBBox : public TensorTransform {
  340. public:
  341. /// \brief Constructor.
  342. /// \param[in] size A vector representing the output size of the cropped image.
  343. /// If size is a single value, a square crop of size (size, size) is returned.
  344. /// If size has 2 values, it should be (height, width).
  345. /// \param[in] padding A vector representing the number of pixels to pad the image
  346. /// If vector has one value, it pads all sides of the image with that value.
  347. /// If vector has two values, it pads left and top with the first and
  348. /// right and bottom with the second value.
  349. /// If vector has four values, it pads left, top, right, and bottom with
  350. /// those values respectively.
  351. /// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
  352. /// the given output size.
  353. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  354. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
  355. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  356. /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
  357. /// [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
  358. explicit RandomCropWithBBox(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
  359. bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
  360. BorderType padding_mode = BorderType::kConstant);
  361. /// \brief Destructor.
  362. ~RandomCropWithBBox() = default;
  363. protected:
  364. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  365. /// \return Shared pointer to TensorOperation object.
  366. std::shared_ptr<TensorOperation> Parse() override;
  367. private:
  368. struct Data;
  369. std::shared_ptr<Data> data_;
  370. };
  371. /// \brief RandomHorizontalFlip TensorTransform.
  372. /// \notes Tensor operation to perform random horizontal flip.
  373. class RandomHorizontalFlip : public TensorTransform {
  374. public:
  375. /// \brief Constructor.
  376. /// \param[in] prob A float representing the probability of flip.
  377. explicit RandomHorizontalFlip(float prob = 0.5);
  378. /// \brief Destructor.
  379. ~RandomHorizontalFlip() = default;
  380. protected:
  381. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  382. /// \return Shared pointer to TensorOperation object.
  383. std::shared_ptr<TensorOperation> Parse() override;
  384. private:
  385. struct Data;
  386. std::shared_ptr<Data> data_;
  387. };
  388. /// \brief RandomHorizontalFlipWithBBox TensorTransform.
  389. /// \notes Flip the input image horizontally, randomly with a given probability and adjust bounding boxes accordingly.
  390. class RandomHorizontalFlipWithBBox : public TensorTransform {
  391. public:
  392. /// \brief Constructor.
  393. /// \param[in] prob A float representing the probability of flip.
  394. explicit RandomHorizontalFlipWithBBox(float prob = 0.5);
  395. /// \brief Destructor.
  396. ~RandomHorizontalFlipWithBBox() = default;
  397. protected:
  398. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  399. /// \return Shared pointer to TensorOperation object.
  400. std::shared_ptr<TensorOperation> Parse() override;
  401. private:
  402. struct Data;
  403. std::shared_ptr<Data> data_;
  404. };
  405. /// \brief RandomPosterize TensorTransform.
  406. /// \notes Tensor operation to perform random posterize.
  407. class RandomPosterize : public TensorTransform {
  408. public:
  409. /// \brief Constructor.
  410. /// \param[in] bit_range - uint8_t vector representing the minimum and maximum bit in range. (Default={4, 8})
  411. explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});
  412. /// \brief Destructor.
  413. ~RandomPosterize() = default;
  414. protected:
  415. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  416. /// \return Shared pointer to TensorOperation object.
  417. std::shared_ptr<TensorOperation> Parse() override;
  418. private:
  419. struct Data;
  420. std::shared_ptr<Data> data_;
  421. };
  422. /// \brief RandomResize TensorTransform.
  423. /// \notes Resize the input image using a randomly selected interpolation mode.
  424. // the same image aspect ratio. If size has 2 values, it should be (height, width).
  425. class RandomResize : public TensorTransform {
  426. public:
  427. /// \brief Constructor.
  428. /// \param[in] size A vector representing the output size of the resized image.
  429. /// If size is a single value, the smaller edge of the image will be resized to this value with
  430. explicit RandomResize(std::vector<int32_t> size);
  431. /// \brief Destructor.
  432. ~RandomResize() = default;
  433. protected:
  434. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  435. /// \return Shared pointer to TensorOperation object.
  436. std::shared_ptr<TensorOperation> Parse() override;
  437. private:
  438. struct Data;
  439. std::shared_ptr<Data> data_;
  440. };
  441. /// \brief RandomResizeWithBBox TensorTransform.
  442. /// \notes Resize the input image using a randomly selected interpolation mode and adjust
  443. /// bounding boxes accordingly.
  444. class RandomResizeWithBBox : public TensorTransform {
  445. public:
  446. /// \brief Constructor.
  447. /// \param[in] size A vector representing the output size of the resized image.
  448. /// If size is a single value, the smaller edge of the image will be resized to this value with
  449. // the same image aspect ratio. If size has 2 values, it should be (height, width).
  450. explicit RandomResizeWithBBox(std::vector<int32_t> size);
  451. /// \brief Destructor.
  452. ~RandomResizeWithBBox() = default;
  453. protected:
  454. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  455. /// \return Shared pointer to TensorOperation object.
  456. std::shared_ptr<TensorOperation> Parse() override;
  457. private:
  458. struct Data;
  459. std::shared_ptr<Data> data_;
  460. };
  461. /// \brief RandomResizedCrop TensorTransform.
  462. /// \notes Crop the input image to a random size and aspect ratio.
  463. class RandomResizedCrop : public TensorTransform {
  464. public:
  465. /// \brief Constructor.
  466. /// \param[in] size A vector representing the output size of the cropped image.
  467. /// If size is a single value, a square crop of size (size, size) is returned.
  468. /// If size has 2 values, it should be (height, width).
  469. /// \param[in] scale Range [min, max) of respective size of the original
  470. /// size to be cropped (default=(0.08, 1.0))
  471. /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
  472. /// (default=(3. / 4., 4. / 3.)).
  473. /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear)
  474. /// \param[in] max_attempts The maximum number of attempts to propose a valid
  475. /// crop_area (default=10). If exceeded, fall back to use center_crop instead.
  476. explicit RandomResizedCrop(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  477. std::vector<float> ratio = {3. / 4., 4. / 3.},
  478. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  479. /// \brief Destructor.
  480. ~RandomResizedCrop() = default;
  481. protected:
  482. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  483. /// \return Shared pointer to TensorOperation object.
  484. std::shared_ptr<TensorOperation> Parse() override;
  485. private:
  486. struct Data;
  487. std::shared_ptr<Data> data_;
  488. };
  489. /// \brief RandomResizedCropWithBBox TensorTransform.
  490. /// \notes Crop the input image to a random size and aspect ratio.
  491. class RandomResizedCropWithBBox : public TensorTransform {
  492. public:
  493. /// \brief Constructor.
  494. /// \param[in] size A vector representing the output size of the cropped image.
  495. /// If size is a single value, a square crop of size (size, size) is returned.
  496. /// If size has 2 values, it should be (height, width).
  497. /// \param[in] scale Range [min, max) of respective size of the original
  498. /// size to be cropped (default=(0.08, 1.0))
  499. /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
  500. /// (default=(3. / 4., 4. / 3.)).
  501. /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear)
  502. /// \param[in] max_attempts The maximum number of attempts to propose a valid
  503. /// crop_area (default=10). If exceeded, fall back to use center_crop instead.
  504. RandomResizedCropWithBBox(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  505. std::vector<float> ratio = {3. / 4., 4. / 3.},
  506. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  507. /// \brief Destructor.
  508. ~RandomResizedCropWithBBox() = default;
  509. protected:
  510. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  511. /// \return Shared pointer to TensorOperation object.
  512. std::shared_ptr<TensorOperation> Parse() override;
  513. private:
  514. struct Data;
  515. std::shared_ptr<Data> data_;
  516. };
  517. /// \brief RandomRotation TensorOp
  518. /// \notes Rotates the image according to parameters
  519. class RandomRotation : public TensorTransform {
  520. public:
  521. /// \brief Constructor.
  522. /// \param[in] degrees A float vector of size, representing the starting and ending degree
  523. /// \param[in] resample An enum for the mode of interpolation
  524. /// \param[in] expand A boolean representing whether the image is expanded after rotation
  525. /// \param[in] center A float vector of size 2, representing the x and y center of rotation.
  526. /// \param[in] fill_value A vector representing the value to fill the area outside the transform
  527. /// in the output image. If 1 value is provided, it is used for all RGB channels.
  528. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  529. RandomRotation(std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour,
  530. bool expand = false, std::vector<float> center = {-1, -1},
  531. std::vector<uint8_t> fill_value = {0, 0, 0});
  532. /// \brief Destructor.
  533. ~RandomRotation() = default;
  534. protected:
  535. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  536. /// \return Shared pointer to TensorOperation object.
  537. std::shared_ptr<TensorOperation> Parse() override;
  538. private:
  539. struct Data;
  540. std::shared_ptr<Data> data_;
  541. };
  542. /// \brief RandomSelectSubpolicy TensorTransform.
  543. /// \notes Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples
  544. /// (op, prob), where op is a TensorTransform operation and prob is the probability that this op will be applied.
  545. /// Once a sub-policy is selected, each op within the sub-policy with be applied in sequence according to its
  546. /// probability.
  547. class RandomSelectSubpolicy : public TensorTransform {
  548. public:
  549. /// \brief Constructor.
  550. /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers
  551. explicit RandomSelectSubpolicy(std::vector<std::vector<std::pair<TensorTransform *, double>>> policy);
  552. /// \brief Constructor.
  553. /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers
  554. explicit RandomSelectSubpolicy(std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> policy);
  555. /// \brief Constructor.
  556. /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers
  557. explicit RandomSelectSubpolicy(
  558. std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> policy);
  559. /// \brief Destructor.
  560. ~RandomSelectSubpolicy() = default;
  561. protected:
  562. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  563. /// \return Shared pointer to TensorOperation object.
  564. std::shared_ptr<TensorOperation> Parse() override;
  565. private:
  566. struct Data;
  567. std::shared_ptr<Data> data_;
  568. };
  569. /// \brief RandomSharpness TensorTransform.
  570. /// \notes Tensor operation to perform random sharpness.
  571. class RandomSharpness : public TensorTransform {
  572. public:
  573. /// \brief Constructor.
  574. /// \param[in] degrees A float vector of size 2, representing the starting and ending degree to uniformly
  575. /// sample from, to select a degree to adjust sharpness.
  576. explicit RandomSharpness(std::vector<float> degrees = {0.1, 1.9});
  577. /// \brief Destructor.
  578. ~RandomSharpness() = default;
  579. protected:
  580. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  581. /// \return Shared pointer to TensorOperation object.
  582. std::shared_ptr<TensorOperation> Parse() override;
  583. private:
  584. struct Data;
  585. std::shared_ptr<Data> data_;
  586. };
  587. /// \brief RandomSolarize TensorTransform.
  588. /// \notes Invert pixels randomly within specified range. If min=max, it is a single fixed magnitude operation
  589. /// to inverts all pixel above that threshold
  590. class RandomSolarize : public TensorTransform {
  591. public:
  592. /// \brief Constructor.
  593. /// \param[in] threshold A vector with two elements specifying the pixel range to invert.
  594. explicit RandomSolarize(std::vector<uint8_t> threshold = {0, 255});
  595. /// \brief Destructor.
  596. ~RandomSolarize() = default;
  597. protected:
  598. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  599. /// \return Shared pointer to TensorOperation object.
  600. std::shared_ptr<TensorOperation> Parse() override;
  601. private:
  602. struct Data;
  603. std::shared_ptr<Data> data_;
  604. };
  605. /// \brief RandomVerticalFlip TensorTransform.
  606. /// \notes Tensor operation to perform random vertical flip.
  607. class RandomVerticalFlip : public TensorTransform {
  608. public:
  609. /// \brief Constructor.
  610. /// \param[in] prob A float representing the probability of flip.
  611. explicit RandomVerticalFlip(float prob = 0.5);
  612. /// \brief Destructor.
  613. ~RandomVerticalFlip() = default;
  614. protected:
  615. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  616. /// \return Shared pointer to TensorOperation object.
  617. std::shared_ptr<TensorOperation> Parse() override;
  618. private:
  619. struct Data;
  620. std::shared_ptr<Data> data_;
  621. };
  622. /// \brief RandomVerticalFlipWithBBox TensorTransform.
  623. /// \notes Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly.
  624. class RandomVerticalFlipWithBBox : public TensorTransform {
  625. public:
  626. /// \brief Constructor.
  627. /// \param[in] prob A float representing the probability of flip.
  628. explicit RandomVerticalFlipWithBBox(float prob = 0.5);
  629. /// \brief Destructor.
  630. ~RandomVerticalFlipWithBBox() = default;
  631. protected:
  632. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  633. /// \return Shared pointer to TensorOperation object.
  634. std::shared_ptr<TensorOperation> Parse() override;
  635. private:
  636. struct Data;
  637. std::shared_ptr<Data> data_;
  638. };
  639. /// \brief RescaleOperation TensorTransform.
  640. /// \notes Tensor operation to rescale the input image.
  641. class Rescale : public TensorTransform {
  642. public:
  643. /// \brief Constructor.
  644. /// \param[in] rescale Rescale factor.
  645. /// \param[in] shift Shift factor.
  646. Rescale(float rescale, float shift);
  647. /// \brief Destructor.
  648. ~Rescale() = default;
  649. protected:
  650. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  651. /// \return Shared pointer to TensorOperation object.
  652. std::shared_ptr<TensorOperation> Parse() override;
  653. private:
  654. struct Data;
  655. std::shared_ptr<Data> data_;
  656. };
  657. /// \brief ResizeWithBBox TensorTransform.
  658. /// \notes Resize the input image to the given size and adjust bounding boxes accordingly.
  659. class ResizeWithBBox : public TensorTransform {
  660. public:
  661. /// \brief Constructor.
  662. /// \param[in] size The output size of the resized image.
  663. /// If size is an integer, smaller edge of the image will be resized to this value with the same image aspect
  664. /// ratio. If size is a sequence of length 2, it should be (height, width).
  665. /// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear).
  666. explicit ResizeWithBBox(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);
  667. /// \brief Destructor.
  668. ~ResizeWithBBox() = default;
  669. protected:
  670. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  671. /// \return Shared pointer to TensorOperation object.
  672. std::shared_ptr<TensorOperation> Parse() override;
  673. private:
  674. struct Data;
  675. std::shared_ptr<Data> data_;
  676. };
  677. /// \brief RgbaToBgr TensorTransform.
  678. /// \notes Changes the input 4 channel RGBA tensor to 3 channel BGR.
  679. class RGBA2BGR : public TensorTransform {
  680. public:
  681. /// \brief Constructor.
  682. RGBA2BGR();
  683. /// \brief Destructor.
  684. ~RGBA2BGR() = default;
  685. protected:
  686. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  687. /// \return Shared pointer to TensorOperation object.
  688. std::shared_ptr<TensorOperation> Parse() override;
  689. };
  690. /// \brief RgbaToRgb TensorTransform.
  691. /// \notes Changes the input 4 channel RGBA tensor to 3 channel RGB.
  692. class RGBA2RGB : public TensorTransform {
  693. public:
  694. /// \brief Constructor.
  695. RGBA2RGB();
  696. /// \brief Destructor.
  697. ~RGBA2RGB() = default;
  698. protected:
  699. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  700. /// \return Shared pointer to TensorOperation object.
  701. std::shared_ptr<TensorOperation> Parse() override;
  702. };
  703. /// \brief SoftDvppDecodeRandomCropResizeJpeg TensorTransform.
  704. /// \notes Tensor operation to decode, random crop and resize JPEG image using the simulation algorithm of
  705. /// Ascend series chip DVPP module. The usage scenario is consistent with SoftDvppDecodeResizeJpeg.
  706. /// The input image size should be in range [32*32, 8192*8192].
  707. /// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  708. /// Only images with an even resolution can be output. The output of odd resolution is not supported.
  709. class SoftDvppDecodeRandomCropResizeJpeg : public TensorTransform {
  710. public:
  711. /// \brief Constructor.
  712. /// \param[in] size A vector representing the output size of the resized image.
  713. /// If size is a single value, smaller edge of the image will be resized to this value with
  714. /// the same image aspect ratio. If size has 2 values, it should be (height, width).
  715. SoftDvppDecodeRandomCropResizeJpeg(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  716. std::vector<float> ratio = {3. / 4., 4. / 3.}, int32_t max_attempts = 10);
  717. /// \brief Destructor.
  718. ~SoftDvppDecodeRandomCropResizeJpeg() = default;
  719. protected:
  720. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  721. /// \return Shared pointer to TensorOperation object.
  722. std::shared_ptr<TensorOperation> Parse() override;
  723. private:
  724. struct Data;
  725. std::shared_ptr<Data> data_;
  726. };
  727. /// \brief SoftDvppDecodeResizeJpeg TensorTransform.
  728. /// \notes Tensor operation to decode and resize JPEG image using the simulation algorithm of Ascend series
  729. /// chip DVPP module. It is recommended to use this algorithm in the following scenarios:
  730. /// When training, the DVPP of the Ascend chip is not used,
  731. /// and the DVPP of the Ascend chip is used during inference,
  732. /// and the accuracy of inference is lower than the accuracy of training;
  733. /// and the input image size should be in range [32*32, 8192*8192].
  734. /// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  735. /// Only images with an even resolution can be output. The output of odd resolution is not supported.
  736. class SoftDvppDecodeResizeJpeg : public TensorTransform {
  737. public:
  738. /// \brief Constructor.
  739. /// \param[in] size A vector representing the output size of the resized image.
  740. /// If size is a single value, smaller edge of the image will be resized to this value with
  741. /// the same image aspect ratio. If size has 2 values, it should be (height, width).
  742. explicit SoftDvppDecodeResizeJpeg(std::vector<int32_t> size);
  743. /// \brief Destructor.
  744. ~SoftDvppDecodeResizeJpeg() = default;
  745. protected:
  746. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  747. /// \return Shared pointer to TensorOperation object.
  748. std::shared_ptr<TensorOperation> Parse() override;
  749. private:
  750. struct Data;
  751. std::shared_ptr<Data> data_;
  752. };
  753. /// \brief SwapRedBlue TensorOp
  754. /// \notes Swaps the red and blue channels in image
  755. class SwapRedBlue : public TensorTransform {
  756. public:
  757. /// \brief Constructor.
  758. SwapRedBlue();
  759. /// \brief Destructor.
  760. ~SwapRedBlue() = default;
  761. protected:
  762. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  763. /// \return Shared pointer to TensorOperation object.
  764. std::shared_ptr<TensorOperation> Parse() override;
  765. };
  766. /// \brief UniformAugment TensorTransform.
  767. /// \notes Tensor operation to perform randomly selected augmentation.
  768. class UniformAugment : public TensorTransform {
  769. public:
  770. /// \brief Constructor.
  771. /// \param[in] transforms Raw pointer to vector of TensorTransform operations.
  772. /// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
  773. explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2);
  774. /// \brief Constructor.
  775. /// \param[in] transforms Smart pointer to vector of TensorTransform operations.
  776. /// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
  777. explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2);
  778. /// \brief Constructor.
  779. /// \param[in] transforms Object pointer to vector of TensorTransform operations.
  780. /// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
  781. explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2);
  782. /// \brief Destructor.
  783. ~UniformAugment() = default;
  784. protected:
  785. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  786. /// \return Shared pointer to TensorOperation object.
  787. std::shared_ptr<TensorOperation> Parse() override;
  788. private:
  789. struct Data;
  790. std::shared_ptr<Data> data_;
  791. };
  792. } // namespace vision
  793. } // namespace dataset
  794. } // namespace mindspore
  795. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_