You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

image_utils.h 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_
  17. #define DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_
  18. #include <setjmp.h>
  19. #include <memory>
  20. #include <random>
  21. #include <string>
  22. #include <vector>
  23. #if defined(_WIN32) || defined(_WIN64)
  24. #undef HAVE_STDDEF_H
  25. #undef HAVE_STDLIB_H
  26. #endif
  27. #include "./jpeglib.h"
  28. #include "./jerror.h"
  29. #include <opencv2/imgproc/imgproc.hpp>
  30. #include "dataset/core/tensor.h"
  31. #include "dataset/kernels/tensor_op.h"
  32. #include "dataset/util/status.h"
  33. namespace mindspore {
  34. namespace dataset {
  35. enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
  36. enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
  37. void JpegErrorExitCustom(j_common_ptr cinfo);
  38. struct JpegErrorManagerCustom {
  39. // "public" fields
  40. struct jpeg_error_mgr pub;
  41. // for return to caller
  42. jmp_buf setjmp_buffer;
  43. };
  44. // Returns the interpolation mode in openCV format
  45. // @param mode: interpolation mode in DE format
  46. int GetCVInterpolationMode(InterpolationMode mode);
  47. // Returns the openCV equivalent of the border type used for padding.
  48. // @param type
  49. // @return
  50. int GetCVBorderType(BorderType type);
  51. // Returns flipped image
  52. // @param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  53. // @param flip_code: 1 for Horizontal (around y-axis), 0 for Vertical (around x-axis), -1 for both
  54. // The flipping happens in place.
  55. Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code);
  56. // Returns Horizontally flipped image
  57. // @param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  58. // The flipping happens in place.
  59. Status HorizontalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
  60. // Returns Vertically flipped image
  61. // @param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  62. // The flipping happens in place.
  63. Status VerticalFlip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
  64. // Returns Resized image.
  65. // @param input/output: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  66. // @param output_height: height of output
  67. // @param output_width: width of output
  68. // @param fx: horizontal scale
  69. // @param fy: vertical scale
  70. // @param InterpolationMode: the interpolation mode
  71. // @param output: Resized image of shape <outputHeight,outputWidth,C> or <outputHeight,outputWidth>
  72. // and same type as input
  73. Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height,
  74. int32_t output_width, double fx = 0.0, double fy = 0.0,
  75. InterpolationMode mode = InterpolationMode::kLinear);
  76. // Returns Decoded image
  77. // Supported images:
  78. // BMP JPEG JPG PNG TIFF
  79. // supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly.
  80. // @param input: CVTensor containing the not decoded image 1D bytes
  81. // @param output: Decoded image Tensor of shape <H,W,C> and type DE_UINT8. Pixel order is RGB
  82. Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
  83. Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
  84. bool HasJpegMagic(const std::shared_ptr<Tensor> &input);
  85. void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size);
  86. Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x = 0, int y = 0,
  87. int w = 0, int h = 0);
  88. // Returns Rescaled image
  89. // @param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  90. // @param rescale: rescale parameter
  91. // @param shift: shift parameter
  92. // @param output: Rescaled image Tensor of same input shape and type DE_FLOAT32
  93. Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift);
  94. // Returns cropped ROI of an image
  95. // @param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  96. // @param x: starting horizontal position of ROI
  97. // @param y: starting vertical position of ROI
  98. // @param w: width of the ROI
  99. // @param h: height of the ROI
  100. // @param output: Cropped image Tensor of shape <h,w,C> or <h,w> and same input type.
  101. Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h);
  102. // Swaps the channels in the image, i.e. converts HWC to CHW
  103. // @param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  104. // @param output: Tensor of shape <C,H,W> or <H,W> and same input type.
  105. Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
  106. // Swap the red and blue pixels (RGB <-> BGR)
  107. // @param input: Tensor of shape <H,W,3> and any OpenCv compatible type, see CVTensor.
  108. // @param output: Swapped image of same shape and type
  109. Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
  110. // Crops and resizes the image
  111. // @param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  112. // @param x: horizontal start point
  113. // @param y: vertical start point
  114. // @param crop_height: height of the cropped ROI
  115. // @param crop_width: width of the cropped ROI
  116. // @param target_width: width of the final resized image
  117. // @param target_height: height of the final resized image
  118. // @param InterpolationMode: the interpolation used in resize operation
  119. // @param output: Tensor of shape <targetHeight,targetWidth,C> or <targetHeight,targetWidth>
  120. // and same type as input
  121. Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y,
  122. int crop_height, int crop_width, int target_height, int target_width, InterpolationMode mode);
  123. // Returns rotated image
  124. // @param input: Tensor of shape <H,W,C> or <H,W> and any OpenCv compatible type, see CVTensor.
  125. // @param fx: rotation center x coordinate
  126. // @param fy: rotation center y coordinate
  127. // @param degree: degree to rotate
  128. // @param expand: if reshape is necessary
  129. // @param output: rotated image of same input type.
  130. Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float fx, float fy, float degree,
  131. InterpolationMode interpolation = InterpolationMode::kNearestNeighbour, bool expand = false,
  132. uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
  133. // Returns Normalized image
  134. // @param input: Tensor of shape <H,W,C> in RGB order and any OpenCv compatible type, see CVTensor.
  135. // @param mean: Tensor of shape <3> and type DE_FLOAT32 which are mean of each channel in RGB order
  136. // @param std: Tensor of shape <3> and type DE_FLOAT32 which are std of each channel in RGB order
  137. // @param output: Normalized image Tensor of same input shape and type DE_FLOAT32
  138. Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
  139. const std::shared_ptr<Tensor> &mean, const std::shared_ptr<Tensor> &std);
  140. // Returns image with adjusted brightness.
  141. // @param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
  142. // @param alpha: Alpha value to adjust brightness by. Should be a positive number.
  143. // If user input one value in python, the range is [1 - value, 1 + value].
  144. // This will output original image multiplied by alpha. 0 gives a black image, 1 gives the
  145. // original image while 2 increases the brightness by a factor of 2.
  146. // @param output: Adjusted image of same shape and type.
  147. Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha);
  148. // Returns image with adjusted contrast.
  149. // @param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
  150. // @param alpha: Alpha value to adjust contrast by. Should be a positive number.
  151. // If user input one value in python, the range is [1 - value, 1 + value].
  152. // 0 gives a solid gray image, 1 gives the original image while 2 increases
  153. // the contrast by a factor of 2.
  154. // @param output: Adjusted image of same shape and type.
  155. Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha);
  156. // Returns image with adjusted saturation.
  157. // @param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
  158. // @param alpha: Alpha value to adjust saturation by. Should be a positive number.
  159. // If user input one value in python, the range is [1 - value, 1 + value].
  160. // 0 will give a black and white image, 1 will give the original image while
  161. // 2 will enhance the saturation by a factor of 2.
  162. // @param output: Adjusted image of same shape and type.
  163. Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &alpha);
  164. // Returns image with adjusted hue.
  165. // @param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
  166. // @param hue: Hue value to adjust by, should be within range [-0.5, 0.5]. 0.5 and - 0.5 will reverse the hue channel
  167. // completely.
  168. // If user input one value in python, the range is [-value, value].
  169. // @param output: Adjusted image of same shape and type.
  170. Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &hue);
  171. // Masks out a random section from the image with set dimension
  172. // @param input: input Tensor
  173. // @param output: cutOut Tensor
  174. // @param box_height: height of the cropped box
  175. // @param box_width: width of the cropped box
  176. // @param num_patches: number of boxes to cut out from the image
  177. // @param bounded: boolean flag to toggle between random erasing and cutout
  178. // @param random_color: whether or not random fill value should be used
  179. // @param fill_r: red fill value for erase
  180. // @param fill_g: green fill value for erase
  181. // @param fill_b: blue fill value for erase.
  182. Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t box_height,
  183. int32_t box_width, int32_t num_patches, bool bounded, bool random_color, std::mt19937 *rnd,
  184. uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
  185. // Pads the input image and puts the padded image in the output
  186. // @param input: input Tensor
  187. // @param output: padded Tensor
  188. // @param pad_top: amount of padding done in top
  189. // @param pad_bottom: amount of padding done in bottom
  190. // @param pad_left: amount of padding done in left
  191. // @param pad_right: amount of padding done in right
  192. // @param border_types: the interpolation to be done in the border
  193. // @param fill_r: red fill value for pad
  194. // @param fill_g: green fill value for pad
  195. // @param fill_b: blue fill value for pad.
  196. Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
  197. const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
  198. uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
  199. // -------- BBOX OPERATIONS -------- //
  200. // Updates and checks bounding boxes for new cropped region of image
  201. // @param bboxList: A tensor contaning bounding box tensors
  202. // @param bboxCount: total Number of bounding boxes - required within caller function to run update loop
  203. // @param CB_Xmin: Image's CropBox Xmin coordinate
  204. // @param CB_Xmin: Image's CropBox Ymin coordinate
  205. // @param CB_Xmax: Image's CropBox Xmax coordinate - (Xmin + width)
  206. // @param CB_Xmax: Image's CropBox Ymax coordinate - (Ymin + height)
  207. Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int CB_Xmin, int CB_Ymin, int CB_Xmax,
  208. int CB_Ymax);
  209. // Updates bounding boxes with required Top and Left padding
  210. // Top and Left padding amounts required to adjust bboxs min X,Y values according to padding 'push'
  211. // Top/Left since images 0,0 coordinate is taken from top left
  212. // @param bboxList: A tensor contaning bounding box tensors
  213. // @param bboxCount: total Number of bounding boxes - required within caller function to run update loop
  214. // @param pad_top: Total amount of padding applied to image top
  215. // @param pad_left: Total amount of padding applied to image left side
  216. Status PadBBoxes(const std::shared_ptr<Tensor> *bboxList, const size_t &bboxCount, int32_t pad_top, int32_t pad_left);
  217. // Updates bounding boxes for an Image Resize Operation - Takes in set of valid BBoxes
  218. // For e.g those that remain after a crop
  219. // @param bboxList: A tensor contaning bounding box tensors
  220. // @param bboxCount: total Number of bounding boxes - required within caller function to run update loop
  221. // @param bboxList: A tensor contaning bounding box tensors
  222. // @param target_width_: required width of image post resize
  223. // @param target_width_: required height of image post resize
  224. // @param orig_width: current width of image pre resize
  225. // @param orig_height: current height of image pre resize
  226. Status UpdateBBoxesForResize(const std::shared_ptr<Tensor> &bboxList, const size_t &bboxCount, int32_t target_width_,
  227. int32_t target_height_, int orig_width, int orig_height);
  228. } // namespace dataset
  229. } // namespace mindspore
  230. #endif // DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_