You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

vision.h 40 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_
  18. #include <map>
  19. #include <memory>
  20. #include <string>
  21. #include <utility>
  22. #include <vector>
  23. #include "include/api/dual_abi_helper.h"
  24. #include "include/api/status.h"
  25. #include "minddata/dataset/include/constants.h"
  26. #include "minddata/dataset/include/transforms.h"
  27. #include "minddata/dataset/include/vision_lite.h"
  28. namespace mindspore {
  29. namespace dataset {
  30. class TensorOperation;
  31. // Transform operations for performing computer vision.
  32. namespace vision {
  33. /// \brief AutoContrast TensorTransform.
  34. /// \notes Apply automatic contrast on input image.
  35. class AutoContrast final : public TensorTransform {
  36. public:
  37. /// \brief Constructor.
  38. /// \param[in] cutoff Percent of pixels to cut off from the histogram, the valid range of cutoff value is 0 to 100.
  39. /// \param[in] ignore Pixel values to ignore.
  40. explicit AutoContrast(float cutoff = 0.0, std::vector<uint32_t> ignore = {});
  41. /// \brief Destructor.
  42. ~AutoContrast() = default;
  43. protected:
  44. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  45. /// \return Shared pointer to TensorOperation object.
  46. std::shared_ptr<TensorOperation> Parse() override;
  47. private:
  48. struct Data;
  49. std::shared_ptr<Data> data_;
  50. };
  51. /// \brief BoundingBoxAugment TensorTransform.
  52. /// \notes Apply a given image transform on a random selection of bounding box regions of a given image.
  53. class BoundingBoxAugment final : public TensorTransform {
  54. public:
  55. /// \brief Constructor.
  56. /// \param[in] transform Raw pointer to a TensorTransform operation.
  57. /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
  58. explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3);
  59. /// \brief Constructor.
  60. /// \param[in] transform Smart pointer to a TensorTransform operation.
  61. /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
  62. explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3);
  63. /// \brief Constructor.
  64. /// \param[in] transform Object pointer to a TensorTransform operation.
  65. /// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
  66. explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> transform, float ratio = 0.3);
  67. /// \brief Destructor.
  68. ~BoundingBoxAugment() = default;
  69. protected:
  70. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  71. /// \return Shared pointer to TensorOperation object.
  72. std::shared_ptr<TensorOperation> Parse() override;
  73. private:
  74. struct Data;
  75. std::shared_ptr<Data> data_;
  76. };
  77. /// \brief Constructor to apply CutMix on a batch of images
  78. /// \notes Masks a random section of each image with the corresponding part of another randomly
  79. /// selected image in that batch
  80. class CutMixBatch final : public TensorTransform {
  81. public:
  82. /// \brief Constructor.
  83. /// \param[in] image_batch_format The format of the batch
  84. /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0)
  85. /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0)
  86. explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);
  87. /// \brief Destructor.
  88. ~CutMixBatch() = default;
  89. protected:
  90. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  91. /// \return Shared pointer to TensorOperation object.
  92. std::shared_ptr<TensorOperation> Parse() override;
  93. private:
  94. struct Data;
  95. std::shared_ptr<Data> data_;
  96. };
  97. /// \brief CutOut TensorOp
  98. /// \notes Randomly cut (mask) out a given number of square patches from the input image
  99. class CutOut final : public TensorTransform {
  100. public:
  101. /// \brief Constructor.
  102. /// \param[in] length Integer representing the side length of each square patch
  103. /// \param[in] num_patches Integer representing the number of patches to be cut out of an image
  104. explicit CutOut(int32_t length, int32_t num_patches = 1);
  105. /// \brief Destructor.
  106. ~CutOut() = default;
  107. protected:
  108. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  109. /// \return Shared pointer to TensorOperation object.
  110. std::shared_ptr<TensorOperation> Parse() override;
  111. private:
  112. struct Data;
  113. std::shared_ptr<Data> data_;
  114. };
  115. /// \brief Equalize TensorTransform.
  116. /// \notes Apply histogram equalization on input image.
  117. class Equalize final : public TensorTransform {
  118. public:
  119. /// \brief Constructor.
  120. Equalize();
  121. /// \brief Destructor.
  122. ~Equalize() = default;
  123. protected:
  124. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  125. /// \return Shared pointer to TensorOperation object.
  126. std::shared_ptr<TensorOperation> Parse() override;
  127. };
  128. /// \brief HwcToChw TensorTransform.
  129. /// \notes Transpose the input image; shape (H, W, C) to shape (C, H, W).
  130. class HWC2CHW final : public TensorTransform {
  131. public:
  132. /// \brief Constructor.
  133. HWC2CHW();
  134. /// \brief Destructor.
  135. ~HWC2CHW() = default;
  136. protected:
  137. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  138. /// \return Shared pointer to TensorOperation object.
  139. std::shared_ptr<TensorOperation> Parse() override;
  140. };
  141. /// \brief Invert TensorTransform.
  142. /// \notes Apply invert on input image in RGB mode.
  143. class Invert final : public TensorTransform {
  144. public:
  145. /// \brief Constructor.
  146. Invert();
  147. /// \brief Destructor.
  148. ~Invert() = default;
  149. protected:
  150. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  151. /// \return Shared pointer to TensorOperation object.
  152. std::shared_ptr<TensorOperation> Parse() override;
  153. };
  154. /// \brief MixUpBatch TensorTransform.
  155. /// \notes Apply MixUp transformation on an input batch of images and labels. The labels must be in
  156. /// one-hot format and Batch must be called before calling this function.
  157. class MixUpBatch final : public TensorTransform {
  158. public:
  159. /// \brief Constructor.
  160. /// \param[in] alpha hyperparameter of beta distribution (default = 1.0)
  161. explicit MixUpBatch(float alpha = 1);
  162. /// \brief Destructor.
  163. ~MixUpBatch() = default;
  164. protected:
  165. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  166. /// \return Shared pointer to TensorOperation object.
  167. std::shared_ptr<TensorOperation> Parse() override;
  168. private:
  169. struct Data;
  170. std::shared_ptr<Data> data_;
  171. };
  172. /// \brief NormalizePad TensorTransform.
  173. /// \notes Normalize the input image with respect to mean and standard deviation and pad an extra
  174. /// channel with value zero.
  175. class NormalizePad final : public TensorTransform {
  176. public:
  177. /// \brief Constructor.
  178. /// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
  179. /// The mean values must be in range [0.0, 255.0].
  180. /// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
  181. /// The standard deviation values must be in range (0.0, 255.0]
  182. /// \param[in] dtype The output datatype of Tensor.
  183. /// The standard deviation values must be "float32" or "float16"(default = "float32")
  184. explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std,
  185. const std::string &dtype = "float32")
  186. : NormalizePad(mean, std, StringToChar(dtype)) {}
  187. explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype);
  188. /// \brief Destructor.
  189. ~NormalizePad() = default;
  190. protected:
  191. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  192. /// \return Shared pointer to TensorOperation object.
  193. std::shared_ptr<TensorOperation> Parse() override;
  194. private:
  195. struct Data;
  196. std::shared_ptr<Data> data_;
  197. };
  198. /// \brief Pad TensorOp
  199. /// \notes Pads the image according to padding parameters
  200. class Pad final : public TensorTransform {
  201. public:
  202. /// \brief Constructor.
  203. /// \param[in] padding A vector representing the number of pixels to pad the image
  204. /// If vector has one value, it pads all sides of the image with that value.
  205. /// If vector has two values, it pads left and top with the first and
  206. /// right and bottom with the second value.
  207. /// If vector has four values, it pads left, top, right, and bottom with
  208. /// those values respectively.
  209. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  210. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels. If 3 values are provided,
  211. /// it is used to fill R, G, B channels respectively.
  212. /// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
  213. /// Can be any of
  214. /// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
  215. /// - BorderType.kConstant, means it fills the border with constant values
  216. /// - BorderType.kEdge, means it pads with the last value on the edge
  217. /// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
  218. /// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
  219. explicit Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
  220. BorderType padding_mode = BorderType::kConstant);
  221. /// \brief Destructor.
  222. ~Pad() = default;
  223. protected:
  224. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  225. /// \return Shared pointer to TensorOperation object.
  226. std::shared_ptr<TensorOperation> Parse() override;
  227. private:
  228. struct Data;
  229. std::shared_ptr<Data> data_;
  230. };
  231. /// \brief Blends an image with its grayscale version with random weights
  232. /// t and 1 - t generated from a given range. If the range is trivial
  233. /// then the weights are determinate and t equals the bound of the interval
  234. class RandomColor final : public TensorTransform {
  235. public:
  236. /// \brief Constructor.
  237. /// \param[in] t_lb Lower bound on the range of random weights
  238. /// \param[in] t_lb Upper bound on the range of random weights
  239. explicit RandomColor(float t_lb, float t_ub);
  240. /// \brief Destructor.
  241. ~RandomColor() = default;
  242. protected:
  243. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  244. /// \return Shared pointer to TensorOperation object.
  245. std::shared_ptr<TensorOperation> Parse() override;
  246. private:
  247. struct Data;
  248. std::shared_ptr<Data> data_;
  249. };
  250. /// \brief RandomColorAdjust TensorTransform.
  251. /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
  252. class RandomColorAdjust final : public TensorTransform {
  253. public:
  254. /// \brief Constructor.
  255. /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
  256. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  257. /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
  258. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  259. /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
  260. /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
  261. /// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
  262. /// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
  263. /// Default value is {0, 0}
  264. explicit RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
  265. std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
  266. /// \brief Destructor.
  267. ~RandomColorAdjust() = default;
  268. protected:
  269. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  270. /// \return Shared pointer to TensorOperation object.
  271. std::shared_ptr<TensorOperation> Parse() override;
  272. private:
  273. struct Data;
  274. std::shared_ptr<Data> data_;
  275. };
  276. /// \brief RandomCrop TensorTransform.
  277. /// \notes Crop the input image at a random location.
  278. class RandomCrop final : public TensorTransform {
  279. public:
  280. /// \brief Constructor.
  281. /// \param[in] size A vector representing the output size of the cropped image.
  282. /// If size is a single value, a square crop of size (size, size) is returned.
  283. /// If size has 2 values, it should be (height, width).
  284. /// \param[in] padding A vector representing the number of pixels to pad the image
  285. /// If vector has one value, it pads all sides of the image with that value.
  286. /// If vector has two values, it pads left and top with the first and
  287. /// right and bottom with the second value.
  288. /// If vector has four values, it pads left, top, right, and bottom with
  289. /// those values respectively.
  290. /// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
  291. /// the given output size.
  292. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  293. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
  294. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  295. explicit RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
  296. bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
  297. BorderType padding_mode = BorderType::kConstant);
  298. /// \brief Destructor.
  299. ~RandomCrop() = default;
  300. protected:
  301. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  302. /// \return Shared pointer to TensorOperation object.
  303. std::shared_ptr<TensorOperation> Parse() override;
  304. private:
  305. struct Data;
  306. std::shared_ptr<Data> data_;
  307. };
  308. /// \brief RandomCropDecodeResize TensorTransform.
  309. /// \notes Equivalent to RandomResizedCrop, but crops before decodes.
  310. class RandomCropDecodeResize final : public TensorTransform {
  311. public:
  312. /// \brief Constructor.
  313. /// \param[in] size A vector representing the output size of the cropped image.
  314. /// If size is a single value, a square crop of size (size, size) is returned.
  315. /// If size has 2 values, it should be (height, width).
  316. /// \param[in] scale Range [min, max) of respective size of the
  317. /// original size to be cropped (default=(0.08, 1.0))
  318. /// \param[in] ratio Range [min, max) of aspect ratio to be
  319. /// cropped (default=(3. / 4., 4. / 3.))
  320. /// \param[in] interpolation An enum for the mode of interpolation
  321. /// \param[in] The maximum number of attempts to propose a valid crop_area (default=10).
  322. /// If exceeded, fall back to use center_crop instead.
  323. explicit RandomCropDecodeResize(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  324. std::vector<float> ratio = {3. / 4, 4. / 3},
  325. InterpolationMode interpolation = InterpolationMode::kLinear,
  326. int32_t max_attempts = 10);
  327. /// \brief Destructor.
  328. ~RandomCropDecodeResize() = default;
  329. protected:
  330. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  331. /// \return Shared pointer to TensorOperation object.
  332. std::shared_ptr<TensorOperation> Parse() override;
  333. private:
  334. struct Data;
  335. std::shared_ptr<Data> data_;
  336. };
  337. /// \brief RandomCropWithBBox TensorTransform.
  338. /// \notes Crop the input image at a random location and adjust bounding boxes accordingly.
  339. /// If cropped area is out of bbox, the return bbox will be empty.
  340. class RandomCropWithBBox final : public TensorTransform {
  341. public:
  342. /// \brief Constructor.
  343. /// \param[in] size A vector representing the output size of the cropped image.
  344. /// If size is a single value, a square crop of size (size, size) is returned.
  345. /// If size has 2 values, it should be (height, width).
  346. /// \param[in] padding A vector representing the number of pixels to pad the image
  347. /// If vector has one value, it pads all sides of the image with that value.
  348. /// If vector has two values, it pads left and top with the first and
  349. /// right and bottom with the second value.
  350. /// If vector has four values, it pads left, top, right, and bottom with
  351. /// those values respectively.
  352. /// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
  353. /// the given output size.
  354. /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
  355. /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
  356. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  357. /// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
  358. /// [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
  359. explicit RandomCropWithBBox(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
  360. bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
  361. BorderType padding_mode = BorderType::kConstant);
  362. /// \brief Destructor.
  363. ~RandomCropWithBBox() = default;
  364. protected:
  365. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  366. /// \return Shared pointer to TensorOperation object.
  367. std::shared_ptr<TensorOperation> Parse() override;
  368. private:
  369. struct Data;
  370. std::shared_ptr<Data> data_;
  371. };
  372. /// \brief RandomHorizontalFlip TensorTransform.
  373. /// \notes Tensor operation to perform random horizontal flip.
  374. class RandomHorizontalFlip final : public TensorTransform {
  375. public:
  376. /// \brief Constructor.
  377. /// \param[in] prob A float representing the probability of flip.
  378. explicit RandomHorizontalFlip(float prob = 0.5);
  379. /// \brief Destructor.
  380. ~RandomHorizontalFlip() = default;
  381. protected:
  382. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  383. /// \return Shared pointer to TensorOperation object.
  384. std::shared_ptr<TensorOperation> Parse() override;
  385. private:
  386. struct Data;
  387. std::shared_ptr<Data> data_;
  388. };
  389. /// \brief RandomHorizontalFlipWithBBox TensorTransform.
  390. /// \notes Flip the input image horizontally, randomly with a given probability and adjust bounding boxes accordingly.
  391. class RandomHorizontalFlipWithBBox final : public TensorTransform {
  392. public:
  393. /// \brief Constructor.
  394. /// \param[in] prob A float representing the probability of flip.
  395. explicit RandomHorizontalFlipWithBBox(float prob = 0.5);
  396. /// \brief Destructor.
  397. ~RandomHorizontalFlipWithBBox() = default;
  398. protected:
  399. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  400. /// \return Shared pointer to TensorOperation object.
  401. std::shared_ptr<TensorOperation> Parse() override;
  402. private:
  403. struct Data;
  404. std::shared_ptr<Data> data_;
  405. };
  406. /// \brief RandomPosterize TensorTransform.
  407. /// \notes Tensor operation to perform random posterize.
  408. class RandomPosterize final : public TensorTransform {
  409. public:
  410. /// \brief Constructor.
  411. /// \param[in] bit_range - uint8_t vector representing the minimum and maximum bit in range. (Default={4, 8})
  412. explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});
  413. /// \brief Destructor.
  414. ~RandomPosterize() = default;
  415. protected:
  416. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  417. /// \return Shared pointer to TensorOperation object.
  418. std::shared_ptr<TensorOperation> Parse() override;
  419. private:
  420. struct Data;
  421. std::shared_ptr<Data> data_;
  422. };
  423. /// \brief RandomResize TensorTransform.
  424. /// \notes Resize the input image using a randomly selected interpolation mode.
  425. // the same image aspect ratio. If size has 2 values, it should be (height, width).
  426. class RandomResize final : public TensorTransform {
  427. public:
  428. /// \brief Constructor.
  429. /// \param[in] size A vector representing the output size of the resized image.
  430. /// If size is a single value, the smaller edge of the image will be resized to this value with
  431. explicit RandomResize(std::vector<int32_t> size);
  432. /// \brief Destructor.
  433. ~RandomResize() = default;
  434. protected:
  435. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  436. /// \return Shared pointer to TensorOperation object.
  437. std::shared_ptr<TensorOperation> Parse() override;
  438. private:
  439. struct Data;
  440. std::shared_ptr<Data> data_;
  441. };
  442. /// \brief RandomResizeWithBBox TensorTransform.
  443. /// \notes Resize the input image using a randomly selected interpolation mode and adjust
  444. /// bounding boxes accordingly.
  445. class RandomResizeWithBBox final : public TensorTransform {
  446. public:
  447. /// \brief Constructor.
  448. /// \param[in] size A vector representing the output size of the resized image.
  449. /// If size is a single value, the smaller edge of the image will be resized to this value with
  450. // the same image aspect ratio. If size has 2 values, it should be (height, width).
  451. explicit RandomResizeWithBBox(std::vector<int32_t> size);
  452. /// \brief Destructor.
  453. ~RandomResizeWithBBox() = default;
  454. protected:
  455. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  456. /// \return Shared pointer to TensorOperation object.
  457. std::shared_ptr<TensorOperation> Parse() override;
  458. private:
  459. struct Data;
  460. std::shared_ptr<Data> data_;
  461. };
  462. /// \brief RandomResizedCrop TensorTransform.
  463. /// \notes Crop the input image to a random size and aspect ratio.
  464. class RandomResizedCrop final : public TensorTransform {
  465. public:
  466. /// \brief Constructor.
  467. /// \param[in] size A vector representing the output size of the cropped image.
  468. /// If size is a single value, a square crop of size (size, size) is returned.
  469. /// If size has 2 values, it should be (height, width).
  470. /// \param[in] scale Range [min, max) of respective size of the original
  471. /// size to be cropped (default=(0.08, 1.0))
  472. /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
  473. /// (default=(3. / 4., 4. / 3.)).
  474. /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear)
  475. /// \param[in] max_attempts The maximum number of attempts to propose a valid
  476. /// crop_area (default=10). If exceeded, fall back to use center_crop instead.
  477. explicit RandomResizedCrop(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  478. std::vector<float> ratio = {3. / 4., 4. / 3.},
  479. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  480. /// \brief Destructor.
  481. ~RandomResizedCrop() = default;
  482. protected:
  483. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  484. /// \return Shared pointer to TensorOperation object.
  485. std::shared_ptr<TensorOperation> Parse() override;
  486. private:
  487. struct Data;
  488. std::shared_ptr<Data> data_;
  489. };
  490. /// \brief RandomResizedCropWithBBox TensorTransform.
  491. /// \notes Crop the input image to a random size and aspect ratio.
  492. /// If cropped area is out of bbox, the return bbox will be empty.
  493. class RandomResizedCropWithBBox final : public TensorTransform {
  494. public:
  495. /// \brief Constructor.
  496. /// \param[in] size A vector representing the output size of the cropped image.
  497. /// If size is a single value, a square crop of size (size, size) is returned.
  498. /// If size has 2 values, it should be (height, width).
  499. /// \param[in] scale Range [min, max) of respective size of the original
  500. /// size to be cropped (default=(0.08, 1.0))
  501. /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
  502. /// (default=(3. / 4., 4. / 3.)).
  503. /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear)
  504. /// \param[in] max_attempts The maximum number of attempts to propose a valid
  505. /// crop_area (default=10). If exceeded, fall back to use center_crop instead.
  506. RandomResizedCropWithBBox(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  507. std::vector<float> ratio = {3. / 4., 4. / 3.},
  508. InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
  509. /// \brief Destructor.
  510. ~RandomResizedCropWithBBox() = default;
  511. protected:
  512. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  513. /// \return Shared pointer to TensorOperation object.
  514. std::shared_ptr<TensorOperation> Parse() override;
  515. private:
  516. struct Data;
  517. std::shared_ptr<Data> data_;
  518. };
  519. /// \brief RandomRotation TensorOp
  520. /// \notes Rotates the image according to parameters
  521. class RandomRotation final : public TensorTransform {
  522. public:
  523. /// \brief Constructor.
  524. /// \param[in] degrees A float vector of size, representing the starting and ending degree
  525. /// \param[in] resample An enum for the mode of interpolation
  526. /// \param[in] expand A boolean representing whether the image is expanded after rotation
  527. /// \param[in] center A float vector of size 2, representing the x and y center of rotation.
  528. /// \param[in] fill_value A vector representing the value to fill the area outside the transform
  529. /// in the output image. If 1 value is provided, it is used for all RGB channels.
  530. /// If 3 values are provided, it is used to fill R, G, B channels respectively.
  531. RandomRotation(std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour,
  532. bool expand = false, std::vector<float> center = {-1, -1},
  533. std::vector<uint8_t> fill_value = {0, 0, 0});
  534. /// \brief Destructor.
  535. ~RandomRotation() = default;
  536. protected:
  537. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  538. /// \return Shared pointer to TensorOperation object.
  539. std::shared_ptr<TensorOperation> Parse() override;
  540. private:
  541. struct Data;
  542. std::shared_ptr<Data> data_;
  543. };
  544. /// \brief RandomSelectSubpolicy TensorTransform.
  545. /// \notes Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples
  546. /// (op, prob), where op is a TensorTransform operation and prob is the probability that this op will be applied.
  547. /// Once a sub-policy is selected, each op within the sub-policy with be applied in sequence according to its
  548. /// probability.
  549. class RandomSelectSubpolicy final : public TensorTransform {
  550. public:
  551. /// \brief Constructor.
  552. /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers
  553. explicit RandomSelectSubpolicy(const std::vector<std::vector<std::pair<TensorTransform *, double>>> &policy);
  554. /// \brief Constructor.
  555. /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers
  556. explicit RandomSelectSubpolicy(
  557. const std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> &policy);
  558. /// \brief Constructor.
  559. /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers
  560. explicit RandomSelectSubpolicy(
  561. const std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> &policy);
  562. /// \brief Destructor.
  563. ~RandomSelectSubpolicy() = default;
  564. protected:
  565. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  566. /// \return Shared pointer to TensorOperation object.
  567. std::shared_ptr<TensorOperation> Parse() override;
  568. private:
  569. struct Data;
  570. std::shared_ptr<Data> data_;
  571. };
  572. /// \brief RandomSharpness TensorTransform.
  573. /// \notes Tensor operation to perform random sharpness.
  574. class RandomSharpness final : public TensorTransform {
  575. public:
  576. /// \brief Constructor.
  577. /// \param[in] degrees A float vector of size 2, representing the starting and ending degree to uniformly
  578. /// sample from, to select a degree to adjust sharpness.
  579. explicit RandomSharpness(std::vector<float> degrees = {0.1, 1.9});
  580. /// \brief Destructor.
  581. ~RandomSharpness() = default;
  582. protected:
  583. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  584. /// \return Shared pointer to TensorOperation object.
  585. std::shared_ptr<TensorOperation> Parse() override;
  586. private:
  587. struct Data;
  588. std::shared_ptr<Data> data_;
  589. };
  590. /// \brief RandomSolarize TensorTransform.
  591. /// \notes Invert pixels randomly within specified range. If min=max, it is a single fixed magnitude operation
  592. /// to inverts all pixel above that threshold
  593. class RandomSolarize final : public TensorTransform {
  594. public:
  595. /// \brief Constructor.
  596. /// \param[in] threshold A vector with two elements specifying the pixel range to invert.
  597. explicit RandomSolarize(std::vector<uint8_t> threshold = {0, 255});
  598. /// \brief Destructor.
  599. ~RandomSolarize() = default;
  600. protected:
  601. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  602. /// \return Shared pointer to TensorOperation object.
  603. std::shared_ptr<TensorOperation> Parse() override;
  604. private:
  605. struct Data;
  606. std::shared_ptr<Data> data_;
  607. };
  608. /// \brief RandomVerticalFlip TensorTransform.
  609. /// \notes Tensor operation to perform random vertical flip.
  610. class RandomVerticalFlip final : public TensorTransform {
  611. public:
  612. /// \brief Constructor.
  613. /// \param[in] prob A float representing the probability of flip.
  614. explicit RandomVerticalFlip(float prob = 0.5);
  615. /// \brief Destructor.
  616. ~RandomVerticalFlip() = default;
  617. protected:
  618. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  619. /// \return Shared pointer to TensorOperation object.
  620. std::shared_ptr<TensorOperation> Parse() override;
  621. private:
  622. struct Data;
  623. std::shared_ptr<Data> data_;
  624. };
  625. /// \brief RandomVerticalFlipWithBBox TensorTransform.
  626. /// \notes Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly.
  627. class RandomVerticalFlipWithBBox final : public TensorTransform {
  628. public:
  629. /// \brief Constructor.
  630. /// \param[in] prob A float representing the probability of flip.
  631. explicit RandomVerticalFlipWithBBox(float prob = 0.5);
  632. /// \brief Destructor.
  633. ~RandomVerticalFlipWithBBox() = default;
  634. protected:
  635. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  636. /// \return Shared pointer to TensorOperation object.
  637. std::shared_ptr<TensorOperation> Parse() override;
  638. private:
  639. struct Data;
  640. std::shared_ptr<Data> data_;
  641. };
  642. /// \brief RescaleOperation TensorTransform.
  643. /// \notes Tensor operation to rescale the input image.
  644. class Rescale final : public TensorTransform {
  645. public:
  646. /// \brief Constructor.
  647. /// \param[in] rescale Rescale factor.
  648. /// \param[in] shift Shift factor.
  649. Rescale(float rescale, float shift);
  650. /// \brief Destructor.
  651. ~Rescale() = default;
  652. protected:
  653. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  654. /// \return Shared pointer to TensorOperation object.
  655. std::shared_ptr<TensorOperation> Parse() override;
  656. private:
  657. struct Data;
  658. std::shared_ptr<Data> data_;
  659. };
  660. /// \brief ResizeWithBBox TensorTransform.
  661. /// \notes Resize the input image to the given size and adjust bounding boxes accordingly.
  662. class ResizeWithBBox final : public TensorTransform {
  663. public:
  664. /// \brief Constructor.
  665. /// \param[in] size The output size of the resized image.
  666. /// If size is an integer, smaller edge of the image will be resized to this value with the same image aspect
  667. /// ratio. If size is a sequence of length 2, it should be (height, width).
  668. /// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear).
  669. explicit ResizeWithBBox(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);
  670. /// \brief Destructor.
  671. ~ResizeWithBBox() = default;
  672. protected:
  673. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  674. /// \return Shared pointer to TensorOperation object.
  675. std::shared_ptr<TensorOperation> Parse() override;
  676. private:
  677. struct Data;
  678. std::shared_ptr<Data> data_;
  679. };
  680. /// \brief RgbaToBgr TensorTransform.
  681. /// \notes Changes the input 4 channel RGBA tensor to 3 channel BGR.
  682. class RGBA2BGR final : public TensorTransform {
  683. public:
  684. /// \brief Constructor.
  685. RGBA2BGR();
  686. /// \brief Destructor.
  687. ~RGBA2BGR() = default;
  688. protected:
  689. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  690. /// \return Shared pointer to TensorOperation object.
  691. std::shared_ptr<TensorOperation> Parse() override;
  692. };
  693. /// \brief RgbaToRgb TensorTransform.
  694. /// \notes Changes the input 4 channel RGBA tensor to 3 channel RGB.
  695. class RGBA2RGB final : public TensorTransform {
  696. public:
  697. /// \brief Constructor.
  698. RGBA2RGB();
  699. /// \brief Destructor.
  700. ~RGBA2RGB() = default;
  701. protected:
  702. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  703. /// \return Shared pointer to TensorOperation object.
  704. std::shared_ptr<TensorOperation> Parse() override;
  705. };
  706. /// \brief SoftDvppDecodeRandomCropResizeJpeg TensorTransform.
  707. /// \notes Tensor operation to decode, random crop and resize JPEG image using the simulation algorithm of
  708. /// Ascend series chip DVPP module. The usage scenario is consistent with SoftDvppDecodeResizeJpeg.
  709. /// The input image size should be in range [32*32, 8192*8192].
  710. /// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  711. /// Only images with an even resolution can be output. The output of odd resolution is not supported.
  712. class SoftDvppDecodeRandomCropResizeJpeg final : public TensorTransform {
  713. public:
  714. /// \brief Constructor.
  715. /// \param[in] size A vector representing the output size of the resized image.
  716. /// If size is a single value, smaller edge of the image will be resized to this value with
  717. /// the same image aspect ratio. If size has 2 values, it should be (height, width).
  718. /// \param[in] scale Range [min, max) of respective size of the original
  719. /// size to be cropped (default=(0.08, 1.0)).
  720. /// \param[in] ratio Range [min, max) of aspect ratio to be cropped
  721. /// (default=(3. / 4., 4. / 3.)).
  722. /// \param[in] max_attempts The maximum number of attempts to propose a valid
  723. /// crop_area (default=10). If exceeded, fall back to use center_crop instead.
  724. SoftDvppDecodeRandomCropResizeJpeg(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
  725. std::vector<float> ratio = {3. / 4., 4. / 3.}, int32_t max_attempts = 10);
  726. /// \brief Destructor.
  727. ~SoftDvppDecodeRandomCropResizeJpeg() = default;
  728. protected:
  729. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  730. /// \return Shared pointer to TensorOperation object.
  731. std::shared_ptr<TensorOperation> Parse() override;
  732. private:
  733. struct Data;
  734. std::shared_ptr<Data> data_;
  735. };
  736. /// \brief SoftDvppDecodeResizeJpeg TensorTransform.
  737. /// \notes Tensor operation to decode and resize JPEG image using the simulation algorithm of Ascend series
  738. /// chip DVPP module. It is recommended to use this algorithm in the following scenarios:
  739. /// When training, the DVPP of the Ascend chip is not used,
  740. /// and the DVPP of the Ascend chip is used during inference,
  741. /// and the accuracy of inference is lower than the accuracy of training;
  742. /// and the input image size should be in range [32*32, 8192*8192].
  743. /// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
  744. /// Only images with an even resolution can be output. The output of odd resolution is not supported.
  745. class SoftDvppDecodeResizeJpeg final : public TensorTransform {
  746. public:
  747. /// \brief Constructor.
  748. /// \param[in] size A vector representing the output size of the resized image.
  749. /// If size is a single value, smaller edge of the image will be resized to this value with
  750. /// the same image aspect ratio. If size has 2 values, it should be (height, width).
  751. explicit SoftDvppDecodeResizeJpeg(std::vector<int32_t> size);
  752. /// \brief Destructor.
  753. ~SoftDvppDecodeResizeJpeg() = default;
  754. protected:
  755. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  756. /// \return Shared pointer to TensorOperation object.
  757. std::shared_ptr<TensorOperation> Parse() override;
  758. private:
  759. struct Data;
  760. std::shared_ptr<Data> data_;
  761. };
  762. /// \brief SwapRedBlue TensorOp
  763. /// \notes Swaps the red and blue channels in image
  764. class SwapRedBlue final : public TensorTransform {
  765. public:
  766. /// \brief Constructor.
  767. SwapRedBlue();
  768. /// \brief Destructor.
  769. ~SwapRedBlue() = default;
  770. protected:
  771. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  772. /// \return Shared pointer to TensorOperation object.
  773. std::shared_ptr<TensorOperation> Parse() override;
  774. };
  775. /// \brief UniformAugment TensorTransform.
  776. /// \notes Tensor operation to perform randomly selected augmentation.
  777. class UniformAugment final : public TensorTransform {
  778. public:
  779. /// \brief Constructor.
  780. /// \param[in] transforms Raw pointer to vector of TensorTransform operations.
  781. /// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
  782. explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2);
  783. /// \brief Constructor.
  784. /// \param[in] transforms Smart pointer to vector of TensorTransform operations.
  785. /// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
  786. explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2);
  787. /// \brief Constructor.
  788. /// \param[in] transforms Object pointer to vector of TensorTransform operations.
  789. /// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
  790. explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2);
  791. /// \brief Destructor.
  792. ~UniformAugment() = default;
  793. protected:
  794. /// \brief Function to convert TensorTransform object into a TensorOperation object.
  795. /// \return Shared pointer to TensorOperation object.
  796. std::shared_ptr<TensorOperation> Parse() override;
  797. private:
  798. struct Data;
  799. std::shared_ptr<Data> data_;
  800. };
  801. } // namespace vision
  802. } // namespace dataset
  803. } // namespace mindspore
  804. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_H_