| @@ -1 +1 @@ | |||
| Subproject commit 1350673d51b3f8535bc217a7780e6a0b52ff9a41 | |||
| Subproject commit 45ca7863ac6410c8e2f83168481ddc6b43bcea33 | |||
| @@ -56,13 +56,16 @@ | |||
| #include "dataset/kernels/image/pad_op.h" | |||
| #include "dataset/kernels/image/random_color_adjust_op.h" | |||
| #include "dataset/kernels/image/random_crop_and_resize_op.h" | |||
| #include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h" | |||
| #include "dataset/kernels/image/random_crop_decode_resize_op.h" | |||
| #include "dataset/kernels/image/random_crop_op.h" | |||
| #include "dataset/kernels/image/random_crop_with_bbox_op.h" | |||
| #include "dataset/kernels/image/random_horizontal_flip_bbox_op.h" | |||
| #include "dataset/kernels/image/random_horizontal_flip_op.h" | |||
| #include "dataset/kernels/image/random_resize_op.h" | |||
| #include "dataset/kernels/image/random_rotation_op.h" | |||
| #include "dataset/kernels/image/random_vertical_flip_op.h" | |||
| #include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h" | |||
| #include "dataset/kernels/image/rescale_op.h" | |||
| #include "dataset/kernels/image/resize_bilinear_op.h" | |||
| #include "dataset/kernels/image/resize_op.h" | |||
| @@ -381,6 +384,12 @@ void bindTensorOps2(py::module *m) { | |||
| *m, "RandomVerticalFlipOp", "Tensor operation to randomly flip an image vertically.") | |||
| .def(py::init<float>(), py::arg("probability") = RandomVerticalFlipOp::kDefProbability); | |||
| (void)py::class_<RandomVerticalFlipWithBBoxOp, TensorOp, std::shared_ptr<RandomVerticalFlipWithBBoxOp>>( | |||
| *m, "RandomVerticalFlipWithBBoxOp", | |||
| "Tensor operation to randomly flip an image vertically" | |||
| " and adjust bounding boxes.") | |||
| .def(py::init<float>(), py::arg("probability") = RandomVerticalFlipWithBBoxOp::kDefProbability); | |||
| (void)py::class_<RandomCropOp, TensorOp, std::shared_ptr<RandomCropOp>>(*m, "RandomCropOp", | |||
| "Gives random crop of specified size " | |||
| "Takes crop size") | |||
| @@ -392,6 +401,20 @@ void bindTensorOps2(py::module *m) { | |||
| py::arg("fillG") = RandomCropOp::kDefFillG, py::arg("fillB") = RandomCropOp::kDefFillB); | |||
| (void)py::class_<HwcToChwOp, TensorOp, std::shared_ptr<HwcToChwOp>>(*m, "ChannelSwapOp").def(py::init<>()); | |||
| (void)py::class_<RandomCropWithBBoxOp, TensorOp, std::shared_ptr<RandomCropWithBBoxOp>>(*m, "RandomCropWithBBoxOp", | |||
| "Gives random crop of given " | |||
| "size + adjusts bboxes " | |||
| "Takes crop size") | |||
| .def(py::init<int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, BorderType, bool, uint8_t, uint8_t, uint8_t>(), | |||
| py::arg("cropHeight"), py::arg("cropWidth"), py::arg("padTop") = RandomCropWithBBoxOp::kDefPadTop, | |||
| py::arg("padBottom") = RandomCropWithBBoxOp::kDefPadBottom, | |||
| py::arg("padLeft") = RandomCropWithBBoxOp::kDefPadLeft, | |||
| py::arg("padRight") = RandomCropWithBBoxOp::kDefPadRight, | |||
| py::arg("borderType") = RandomCropWithBBoxOp::kDefBorderType, | |||
| py::arg("padIfNeeded") = RandomCropWithBBoxOp::kDefPadIfNeeded, | |||
| py::arg("fillR") = RandomCropWithBBoxOp::kDefFillR, py::arg("fillG") = RandomCropWithBBoxOp::kDefFillG, | |||
| py::arg("fillB") = RandomCropWithBBoxOp::kDefFillB); | |||
| (void)py::class_<OneHotOp, TensorOp, std::shared_ptr<OneHotOp>>( | |||
| *m, "OneHotOp", "Tensor operation to apply one hot encoding. Takes number of classes.") | |||
| .def(py::init<int32_t>()); | |||
| @@ -488,6 +511,20 @@ void bindTensorOps3(py::module *m) { | |||
| py::arg("interpolation") = RandomCropAndResizeOp::kDefInterpolation, | |||
| py::arg("maxIter") = RandomCropAndResizeOp::kDefMaxIter); | |||
| (void)py::class_<RandomCropAndResizeWithBBoxOp, TensorOp, std::shared_ptr<RandomCropAndResizeWithBBoxOp>>( | |||
| *m, "RandomCropAndResizeWithBBoxOp", | |||
| "Tensor operation to randomly crop an image (with BBoxes) and resize to a given size." | |||
| "Takes output height and width and" | |||
| "optional parameters for lower and upper bound for aspect ratio (h/w) and scale," | |||
| "interpolation mode, and max attempts to crop") | |||
| .def(py::init<int32_t, int32_t, float, float, float, float, InterpolationMode, int32_t>(), py::arg("targetHeight"), | |||
| py::arg("targetWidth"), py::arg("scaleLb") = RandomCropAndResizeWithBBoxOp::kDefScaleLb, | |||
| py::arg("scaleUb") = RandomCropAndResizeWithBBoxOp::kDefScaleUb, | |||
| py::arg("aspectLb") = RandomCropAndResizeWithBBoxOp::kDefAspectLb, | |||
| py::arg("aspectUb") = RandomCropAndResizeWithBBoxOp::kDefAspectUb, | |||
| py::arg("interpolation") = RandomCropAndResizeWithBBoxOp::kDefInterpolation, | |||
| py::arg("maxIter") = RandomCropAndResizeWithBBoxOp::kDefMaxIter); | |||
| (void)py::class_<RandomColorAdjustOp, TensorOp, std::shared_ptr<RandomColorAdjustOp>>( | |||
| *m, "RandomColorAdjustOp", | |||
| "Tensor operation to adjust an image's color randomly." | |||
| @@ -10,14 +10,17 @@ add_library(kernels-image OBJECT | |||
| pad_op.cc | |||
| random_color_adjust_op.cc | |||
| random_crop_decode_resize_op.cc | |||
| random_crop_and_resize_with_bbox_op.cc | |||
| random_crop_and_resize_op.cc | |||
| random_crop_op.cc | |||
| random_crop_with_bbox_op.cc | |||
| random_horizontal_flip_op.cc | |||
| random_horizontal_flip_bbox_op.cc | |||
| bounding_box_augment_op.cc | |||
| random_resize_op.cc | |||
| random_rotation_op.cc | |||
| random_vertical_flip_op.cc | |||
| random_vertical_flip_with_bbox_op.cc | |||
| rescale_op.cc | |||
| resize_bilinear_op.cc | |||
| resize_op.cc | |||
| @@ -16,6 +16,7 @@ | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include <opencv2/imgproc/types_c.h> | |||
| #include <algorithm> | |||
| #include <vector> | |||
| #include <stdexcept> | |||
| #include <utility> | |||
| #include <opencv2/imgcodecs.hpp> | |||
| @@ -724,5 +725,101 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in pad"); | |||
| } | |||
| } | |||
| // -------- BBOX OPERATIONS -------- // | |||
| void UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int *CB_Xmin, int *CB_Ymin, int *CB_Xmax, | |||
| int *CB_Ymax) { | |||
| // PASS LIST, COUNT OF BOUNDING BOXES | |||
| // Also PAss X/Y Min/Max of image cropped region - normally obtained from 'GetCropBox' functions | |||
| uint32_t bb_Xmin_t, bb_Ymin_t, bb_Xmax_t, bb_Ymax_t; | |||
| std::vector<int> correctInd; | |||
| std::vector<uint32_t> copyVals; | |||
| dsize_t bboxDim = (*bboxList)->shape()[1]; | |||
| bool retFlag = false; // true unless overlap found | |||
| for (int i = 0; i < *bboxCount; i++) { | |||
| int bb_Xmin, bb_Xmax, bb_Ymin, bb_Ymax; | |||
| (*bboxList)->GetUnsignedIntAt(&bb_Xmin_t, {i, 0}); | |||
| (*bboxList)->GetUnsignedIntAt(&bb_Ymin_t, {i, 1}); | |||
| (*bboxList)->GetUnsignedIntAt(&bb_Xmax_t, {i, 2}); | |||
| (*bboxList)->GetUnsignedIntAt(&bb_Ymax_t, {i, 3}); | |||
| bb_Xmin = bb_Xmin_t; | |||
| bb_Ymin = bb_Ymin_t; | |||
| bb_Xmax = bb_Xmax_t; | |||
| bb_Ymax = bb_Ymax_t; | |||
| bb_Xmax = bb_Xmin + bb_Xmax; | |||
| bb_Ymax = bb_Ymin + bb_Ymax; | |||
| // check for image / BB overlap | |||
| if (((bb_Xmin > *CB_Xmax) || (bb_Ymin > *CB_Ymax)) || ((bb_Xmax < *CB_Xmin) || (bb_Ymax < *CB_Ymin))) { | |||
| retFlag = true; // no overlap found | |||
| } | |||
| if (retFlag) { // invalid bbox no longer within image region - reset to zero | |||
| continue; | |||
| } | |||
| // Update this bbox and select it to move to the final output tensor | |||
| correctInd.push_back(i); | |||
| // adjust BBox corners by bringing into new CropBox if beyond | |||
| // Also reseting/adjusting for boxes to lie within CropBox instead of Image - subtract CropBox Xmin/YMin | |||
| bb_Xmin = bb_Xmin - (std::min(0, (bb_Xmin - *CB_Xmin)) + *CB_Xmin); | |||
| bb_Xmax = bb_Xmax - (std::max(0, (bb_Xmax - *CB_Xmax)) + *CB_Xmin); | |||
| bb_Ymin = bb_Ymin - (std::min(0, (bb_Ymin - *CB_Ymin)) + *CB_Ymin); | |||
| bb_Ymax = bb_Ymax - (std::max(0, (bb_Ymax - *CB_Ymax)) + *CB_Ymin); | |||
| // reset min values and calculate width/height from Box corners | |||
| (*bboxList)->SetItemAt({i, 0}, (uint32_t)(bb_Xmin)); | |||
| (*bboxList)->SetItemAt({i, 1}, (uint32_t)(bb_Ymin)); | |||
| (*bboxList)->SetItemAt({i, 2}, (uint32_t)(bb_Xmax - bb_Xmin)); | |||
| (*bboxList)->SetItemAt({i, 3}, (uint32_t)(bb_Ymax - bb_Ymin)); | |||
| } | |||
| // create new tensor and copy over bboxes still valid to the image | |||
| // bboxes outside of new cropped region are ignored - empty tensor returned in case of none | |||
| *bboxCount = correctInd.size(); | |||
| uint32_t temp; | |||
| for (auto slice : correctInd) { // for every index in the loop | |||
| for (int ix = 0; ix < bboxDim; ix++) { | |||
| (*bboxList)->GetUnsignedIntAt(&temp, {slice, ix}); | |||
| copyVals.push_back(temp); | |||
| } | |||
| } | |||
| std::shared_ptr<Tensor> retV; | |||
| Tensor::CreateTensor(&retV, copyVals, TensorShape({(dsize_t)bboxCount, bboxDim})); | |||
| (*bboxList) = retV; // reset pointer | |||
| } | |||
| void PadBBoxes(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int32_t *pad_top, int32_t *pad_left) { | |||
| uint32_t xMin = 0; | |||
| uint32_t yMin = 0; | |||
| for (int i = 0; i < *bboxCount; i++) { | |||
| (*bboxList)->GetUnsignedIntAt(&xMin, {i, 0}); | |||
| (*bboxList)->GetUnsignedIntAt(&yMin, {i, 1}); | |||
| xMin = xMin + (uint32_t)(*pad_left); // should not be negative | |||
| yMin = yMin + (uint32_t)(*pad_top); | |||
| (*bboxList)->SetItemAt({i, 0}, xMin); | |||
| (*bboxList)->SetItemAt({i, 1}, yMin); | |||
| } | |||
| } | |||
| void UpdateBBoxesForResize(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int32_t *target_width_, | |||
| int32_t *target_height_, int *orig_width, int *orig_height) { | |||
| uint32_t bb_Xmin, bb_Ymin, bb_Xwidth, bb_Ywidth; | |||
| // cast to float to preseve fractional | |||
| double W_aspRatio = (*target_width_ * 1.0) / (*orig_width * 1.0); | |||
| double H_aspRatio = (*target_height_ * 1.0) / (*orig_height * 1.0); | |||
| for (int i = 0; i < *bboxCount; i++) { | |||
| // for each bounding box | |||
| (*bboxList)->GetUnsignedIntAt(&bb_Xmin, {i, 0}); | |||
| (*bboxList)->GetUnsignedIntAt(&bb_Ymin, {i, 1}); | |||
| (*bboxList)->GetUnsignedIntAt(&bb_Xwidth, {i, 2}); | |||
| (*bboxList)->GetUnsignedIntAt(&bb_Ywidth, {i, 3}); | |||
| // update positions and widths | |||
| bb_Xmin = bb_Xmin * W_aspRatio; | |||
| bb_Ymin = bb_Ymin * H_aspRatio; | |||
| bb_Xwidth = bb_Xwidth * W_aspRatio; | |||
| bb_Ywidth = bb_Ywidth * H_aspRatio; | |||
| // reset bounding box values | |||
| (*bboxList)->SetItemAt({i, 0}, (uint32_t)bb_Xmin); | |||
| (*bboxList)->SetItemAt({i, 1}, (uint32_t)bb_Ymin); | |||
| (*bboxList)->SetItemAt({i, 2}, (uint32_t)bb_Xwidth); | |||
| (*bboxList)->SetItemAt({i, 3}, (uint32_t)bb_Ywidth); | |||
| } | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -225,7 +225,39 @@ Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outp | |||
| Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top, | |||
| const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types, | |||
| uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); | |||
| // -------- BBOX OPERATIONS -------- // | |||
| // Updates and checks bounding boxes for new cropped region of image | |||
| // @param bboxList: A tensor contaning bounding box tensors | |||
| // @param bboxCount: total Number of bounding boxes - required within caller function to run update loop | |||
| // @param CB_Xmin: Images's CropBox Xmin coordinate | |||
| // @param CB_Xmin: Images's CropBox Ymin coordinate | |||
| // @param CB_Xmax: Images's CropBox Xmax coordinate - (Xmin + width) | |||
| // @param CB_Xmax: Images's CropBox Ymax coordinate - (Ymin + height) | |||
| void UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int *CB_Xmin, int *CB_Ymin, int *CB_Xmax, | |||
| int *CB_Ymax); | |||
| // Updates bounding boxes with required Top and Left padding | |||
| // Top and Left padding amounts required to adjust bboxs min X,Y values according to padding 'push' | |||
| // Top/Left since images 0,0 coordinate is taken from top left | |||
| // @param bboxList: A tensor contaning bounding box tensors | |||
| // @param bboxCount: total Number of bounding boxes - required within caller function to run update loop | |||
| // @param pad_top: Total amount of padding applied to image top | |||
| // @param pad_left: Total amount of padding applied to image left side | |||
| void PadBBoxes(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int32_t *pad_top, int32_t *pad_left); | |||
| // Updates bounding boxes for an Image Resize Operation - Takes in set of valid BBoxes | |||
| // For e.g those that remain after a crop | |||
| // @param bboxList: A tensor contaning bounding box tensors | |||
| // @param bboxCount: total Number of bounding boxes - required within caller function to run update loop | |||
| // @param bboxList: A tensor contaning bounding box tensors | |||
| // @param target_width_: required width of image post resize | |||
| // @param target_width_: required height of image post resize | |||
| // @param orig_width: current width of image pre resize | |||
| // @param orig_height: current height of image pre resize | |||
| void UpdateBBoxesForResize(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int32_t *target_width_, | |||
| int32_t *target_height_, int *orig_width, int *orig_height); | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <random> | |||
| #include <utility> | |||
| #include "dataset/util/random.h" | |||
| #include "dataset/util/status.h" | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| Status RandomCropAndResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| BOUNDING_BOX_CHECK(input); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Size() >= 2, "The shape of input is abnormal"); | |||
| (*output).push_back(nullptr); // init memory for return vector | |||
| (*output).push_back(nullptr); | |||
| (*output)[1] = std::move(input[1]); // move boxes over to output | |||
| size_t bboxCount = input[1]->shape()[0]; // number of rows in bbox tensor | |||
| int h_in = input[0]->shape()[0]; | |||
| int w_in = input[0]->shape()[1]; | |||
| int x = 0; | |||
| int y = 0; | |||
| int crop_height = 0; | |||
| int crop_width = 0; | |||
| (void)RandomCropAndResizeOp::GetCropBox(h_in, w_in, &x, &y, &crop_height, &crop_width); | |||
| int maxX = x + crop_width; // max dims of selected CropBox on image | |||
| int maxY = y + crop_height; | |||
| UpdateBBoxesForCrop(&(*output)[1], &bboxCount, &x, &y, &maxX, &maxY); // IMAGE_UTIL | |||
| RETURN_IF_NOT_OK(CropAndResize(input[0], &(*output)[0], x, y, crop_height, crop_width, target_height_, target_width_, | |||
| interpolation_)); | |||
| UpdateBBoxesForResize(&(*output)[1], &bboxCount, &target_width_, &target_height_, &crop_width, &crop_height); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ | |||
| #define DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ | |||
| #include "dataset/kernels/image/random_crop_and_resize_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class RandomCropAndResizeWithBBoxOp : public RandomCropAndResizeOp { | |||
| public: | |||
| // Constructor for RandomCropAndResizeWithBBoxOp, with default value and passing to base class constructor | |||
| RandomCropAndResizeWithBBoxOp(int32_t target_height, int32_t target_width, float scale_lb = kDefScaleLb, | |||
| float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, | |||
| float aspect_ub = kDefAspectUb, InterpolationMode interpolation = kDefInterpolation, | |||
| int32_t max_iter = kDefMaxIter) | |||
| : RandomCropAndResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation, | |||
| max_iter) {} | |||
| ~RandomCropAndResizeWithBBoxOp() override = default; | |||
| void Print(std::ostream &out) const override { | |||
| out << "RandomCropAndResizeWithBBox: " << RandomCropAndResizeOp::target_height_ << " " | |||
| << RandomCropAndResizeOp::target_width_; | |||
| } | |||
| Status Compute(const TensorRow &input, TensorRow *output) override; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ | |||
| @@ -48,44 +48,81 @@ RandomCropOp::RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_ | |||
| rnd_.seed(GetSeed()); | |||
| } | |||
| Status RandomCropOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| // Apply padding first then crop | |||
| std::shared_ptr<Tensor> pad_image; | |||
| Status RandomCropOp::ImagePadding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *pad_image, | |||
| int32_t *t_pad_top, int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right, | |||
| int32_t *padded_image_w, int32_t *padded_image_h, bool *crop_further) { | |||
| *t_pad_top = pad_top_; | |||
| *t_pad_bottom = pad_bottom_; | |||
| *t_pad_left = pad_left_; | |||
| *t_pad_right = pad_right_; | |||
| RETURN_IF_NOT_OK( | |||
| Pad(input, &pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pad_image->shape().Size() >= 2, "Abnormal shape"); | |||
| int32_t padded_image_h = pad_image->shape()[0]; | |||
| int32_t padded_image_w = pad_image->shape()[1]; | |||
| // no need to crop if same size | |||
| if (padded_image_h == crop_height_ && padded_image_w == crop_width_) { | |||
| *output = pad_image; | |||
| Pad(input, pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((*pad_image)->shape().Size() >= 2, "Abnormal shape"); | |||
| *padded_image_h = (*pad_image)->shape()[0]; | |||
| *padded_image_w = (*pad_image)->shape()[1]; | |||
| if (*padded_image_h == crop_height_ && *padded_image_w == crop_width_) { | |||
| *crop_further = false; // no need for further crop | |||
| return Status::OK(); | |||
| } | |||
| if (pad_if_needed_) { | |||
| } else if (pad_if_needed_) { | |||
| // check the dimensions of the image for padding, if we do need padding, then we change the pad values | |||
| if (padded_image_h < crop_height_) { | |||
| RETURN_IF_NOT_OK(Pad(pad_image, &pad_image, crop_height_ - padded_image_h, crop_height_ - padded_image_h, 0, 0, | |||
| if (*padded_image_h < crop_height_) { | |||
| RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, crop_height_ - *padded_image_h, crop_height_ - *padded_image_h, 0, 0, | |||
| border_type_, fill_r_, fill_g_, fill_b_)); | |||
| // update pad total above/below | |||
| t_pad_top += (crop_height_ - *padded_image_h); | |||
| t_pad_bottom += (crop_height_ - *padded_image_h); | |||
| } | |||
| if (padded_image_w < crop_width_) { | |||
| RETURN_IF_NOT_OK(Pad(pad_image, &pad_image, 0, 0, crop_width_ - padded_image_w, crop_width_ - padded_image_w, | |||
| if (*padded_image_w < crop_width_) { | |||
| RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, 0, 0, crop_width_ - *padded_image_w, crop_width_ - *padded_image_w, | |||
| border_type_, fill_r_, fill_g_, fill_b_)); | |||
| // update pad total left/right | |||
| t_pad_left += (crop_width_ - *padded_image_w); | |||
| t_pad_right += (crop_width_ - *padded_image_w); | |||
| } | |||
| padded_image_h = pad_image->shape()[0]; | |||
| padded_image_w = pad_image->shape()[1]; | |||
| *padded_image_h = (*pad_image)->shape()[0]; | |||
| *padded_image_w = (*pad_image)->shape()[1]; | |||
| } | |||
| if (padded_image_h < crop_height_ || padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) { | |||
| if (*padded_image_h < crop_height_ || *padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) { | |||
| return Status(StatusCode::kShapeMisMatch, __LINE__, __FILE__, | |||
| "Crop size is greater than the image dimensions or is zero."); | |||
| } | |||
| // random top corner | |||
| int x = std::uniform_int_distribution<int>(0, padded_image_w - crop_width_)(rnd_); | |||
| int y = std::uniform_int_distribution<int>(0, padded_image_h - crop_height_)(rnd_); | |||
| return Status::OK(); | |||
| } | |||
| void RandomCropOp::GenRandomXY(int *x, int *y, int32_t *padded_image_w, int32_t *padded_image_h) { | |||
| // GenCropPoints for cropping | |||
| *x = std::uniform_int_distribution<int>(0, *padded_image_w - crop_width_)(rnd_); | |||
| *y = std::uniform_int_distribution<int>(0, *padded_image_h - crop_height_)(rnd_); | |||
| } | |||
| Status RandomCropOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| // Apply padding first then crop | |||
| std::shared_ptr<Tensor> pad_image; | |||
| int32_t t_pad_top, t_pad_bottom, t_pad_left, t_pad_right; | |||
| int32_t padded_image_w; | |||
| int32_t padded_image_h; | |||
| bool crop_further = true; // whether image needs further cropping based on new size & requirements | |||
| RETURN_IF_NOT_OK( // error code sent back directly | |||
| ImagePadding(input, &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, &padded_image_w, | |||
| &padded_image_h, &crop_further)); | |||
| if (!crop_further) { | |||
| *output = pad_image; | |||
| return Status::OK(); | |||
| } | |||
| int x, y; | |||
| GenRandomXY(&x, &y, &padded_image_w, &padded_image_h); | |||
| return Crop(pad_image, output, x, y, crop_width_, crop_height_); | |||
| } | |||
| Status RandomCropOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) { | |||
| RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs)); | |||
| outputs.clear(); | |||
| @@ -50,11 +50,20 @@ class RandomCropOp : public TensorOp { | |||
| void Print(std::ostream &out) const override { out << "RandomCropOp: " << crop_height_ << " " << crop_width_; } | |||
| Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override; | |||
| Status ImagePadding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *pad_image, int32_t *t_pad_top, | |||
| int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right, int32_t *padded_image_w, | |||
| int32_t *padded_image_h, bool *crop_further); | |||
| void GenRandomXY(int *x, int *y, int32_t *padded_image_w, int32_t *padded_image_h); | |||
| Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override; | |||
| private: | |||
| protected: | |||
| int32_t crop_height_ = 0; | |||
| int32_t crop_width_ = 0; | |||
| private: | |||
| int32_t pad_top_ = 0; | |||
| int32_t pad_bottom_ = 0; | |||
| int32_t pad_left_ = 0; | |||
| @@ -0,0 +1,67 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <random> | |||
| #include <algorithm> | |||
| #include <utility> | |||
| #include "dataset/kernels/image/random_crop_with_bbox_op.h" | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include "dataset/util/random.h" | |||
| #include "dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| Status RandomCropWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| BOUNDING_BOX_CHECK(input); | |||
| std::shared_ptr<Tensor> pad_image; | |||
| int32_t t_pad_top, t_pad_bottom, t_pad_left, t_pad_right; | |||
| size_t boxCount = input[1]->shape()[0]; // number of rows | |||
| int32_t padded_image_h; | |||
| int32_t padded_image_w; | |||
| (*output).push_back(nullptr); | |||
| (*output).push_back(nullptr); | |||
| (*output)[1] = std::move(input[1]); // since some boxes may be removed | |||
| bool crop_further = true; // Whether further cropping will be required or not, true unless required size matches | |||
| RETURN_IF_NOT_OK( // Error passed back to caller | |||
| RandomCropOp::ImagePadding(input[0], &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, | |||
| &padded_image_w, &padded_image_h, &crop_further)); | |||
| // update bounding boxes with new values based on relevant image padding | |||
| if (t_pad_left || t_pad_bottom) { | |||
| PadBBoxes(&(*output)[1], &boxCount, &t_pad_left, &t_pad_top); | |||
| } | |||
| if (!crop_further) { | |||
| // no further cropping required | |||
| (*output)[0] = pad_image; | |||
| (*output)[1] = std::move(input[1]); | |||
| return Status::OK(); | |||
| } | |||
| int x, y; | |||
| RandomCropOp::GenRandomXY(&x, &y, &padded_image_w, &padded_image_h); | |||
| int maxX = x + RandomCropOp::crop_width_; // max dims of selected CropBox on image | |||
| int maxY = y + RandomCropOp::crop_height_; | |||
| UpdateBBoxesForCrop(&(*output)[1], &boxCount, &x, &y, &maxX, &maxY); | |||
| return Crop(pad_image, &(*output)[0], x, y, RandomCropOp::crop_width_, RandomCropOp::crop_height_); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ | |||
| #define DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "dataset/kernels/image/random_crop_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class RandomCropWithBBoxOp : public RandomCropOp { | |||
| public: | |||
| // Constructor for RandomCropWithBBoxOp, with default value and passing to base class constructor | |||
| RandomCropWithBBoxOp(int32_t crop_height, int32_t crop_width, int32_t pad_top = kDefPadTop, | |||
| int32_t pad_bottom = kDefPadBottom, int32_t pad_left = kDefPadLeft, | |||
| int32_t pad_right = kDefPadRight, BorderType border_types = kDefBorderType, | |||
| bool pad_if_needed = kDefPadIfNeeded, uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG, | |||
| uint8_t fill_b = kDefFillB) | |||
| : RandomCropOp(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, border_types, pad_if_needed, | |||
| fill_r, fill_g, fill_b) {} | |||
| ~RandomCropWithBBoxOp() override = default; | |||
| void Print(std::ostream &out) const override { | |||
| out << "RandomCropWithBBoxOp: " << RandomCropOp::crop_height_ << " " << RandomCropOp::crop_width_; | |||
| } | |||
| Status Compute(const TensorRow &input, TensorRow *output) override; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <utility> | |||
| #include "dataset/util/status.h" | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| const float RandomVerticalFlipWithBBoxOp::kDefProbability = 0.5; | |||
| Status RandomVerticalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| BOUNDING_BOX_CHECK(input); | |||
| if (distribution_(rnd_)) { | |||
| dsize_t imHeight = input[0]->shape()[0]; | |||
| size_t boxCount = input[1]->shape()[0]; // number of rows in tensor | |||
| // one time allocation -> updated in the loop | |||
| // type defined based on VOC test dataset | |||
| for (int i = 0; i < boxCount; i++) { | |||
| uint32_t boxCorner_y = 0; | |||
| uint32_t boxHeight = 0; | |||
| uint32_t newBoxCorner_y = 0; | |||
| input[1]->GetUnsignedIntAt(&boxCorner_y, {i, 1}); // get min y of bbox | |||
| input[1]->GetUnsignedIntAt(&boxHeight, {i, 3}); // get height of bbox | |||
| // subtract (curCorner + height) from (max) for new Corner position | |||
| newBoxCorner_y = (imHeight - 1) - (boxCorner_y + boxHeight); | |||
| input[1]->SetItemAt({i, 1}, newBoxCorner_y); | |||
| } | |||
| (*output).push_back(nullptr); | |||
| (*output).push_back(nullptr); | |||
| (*output)[1] = std::move(input[1]); | |||
| return VerticalFlip(input[0], &(*output)[0]); | |||
| } | |||
| *output = input; | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,52 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ | |||
| #define DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ | |||
| #include <memory> | |||
| #include <random> | |||
| #include "dataset/core/tensor.h" | |||
| #include "dataset/kernels/tensor_op.h" | |||
| #include "dataset/util/status.h" | |||
| #include "dataset/util/random.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class RandomVerticalFlipWithBBoxOp : public TensorOp { | |||
| public: | |||
| // Default values, also used by python_bindings.cc | |||
| static const float kDefProbability; | |||
| // Constructor for RandomVerticalFlipWithBBoxOp | |||
| // @param probability: Probablity of Image flipping, 0.5 by default | |||
| explicit RandomVerticalFlipWithBBoxOp(float probability = kDefProbability) : distribution_(probability) { | |||
| rnd_.seed(GetSeed()); | |||
| } | |||
| ~RandomVerticalFlipWithBBoxOp() override = default; | |||
| void Print(std::ostream &out) const override { out << "RandomVerticalFlipWithBBoxOp"; } | |||
| Status Compute(const TensorRow &input, TensorRow *output) override; | |||
| private: | |||
| std::mt19937 rnd_; | |||
| std::bernoulli_distribution distribution_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ | |||
| @@ -149,6 +149,54 @@ class RandomCrop(cde.RandomCropOp): | |||
| super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value) | |||
| class RandomCropWithBBox(cde.RandomCropWithBBoxOp): | |||
| """ | |||
| Crop the input image at a random location, and adjust bounding boxes | |||
| Args: | |||
| size (int or sequence): The output size of the cropped image. | |||
| If size is an int, a square crop of size (size, size) is returned. | |||
| If size is a sequence of length 2, it should be (height, width). | |||
| padding (int or sequence, optional): The number of pixels to pad the image (default=None). | |||
| If padding is not None, pad image firstly with padding values. | |||
| If a single number is provided, it pads all borders with this value. | |||
| If a tuple or list of 2 values are provided, it pads the (left and top) | |||
| with the first value and (right and bottom) with the second value. | |||
| If 4 values are provided as a list or tuple,it pads the left, top, right and bottom respectively. | |||
| pad_if_needed (bool, optional): Pad the image if either side is smaller than | |||
| the given output size (default=False). | |||
| fill_value (int or tuple, optional): The pixel intensity of the borders if | |||
| the padding_mode is Border.CONSTANT (default=0). If it is a 3-tuple, it is used to | |||
| fill R, G, B channels respectively. | |||
| padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of | |||
| [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. | |||
| - Border.CONSTANT, means it fills the border with constant values. | |||
| - Border.EDGE, means it pads with the last value on the edge. | |||
| - Border.REFLECT, means it reflects the values on the edge omitting the last | |||
| value of edge. | |||
| - Border.SYMMETRIC, means it reflects the values on the edge repeating the last | |||
| value of edge. | |||
| """ | |||
| @check_random_crop | |||
| def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT): | |||
| self.size = size | |||
| self.padding = padding | |||
| self.pad_if_needed = pad_if_needed | |||
| self.fill_value = fill_value | |||
| self.padding_mode = padding_mode.value | |||
| if padding is None: | |||
| padding = (0, 0, 0, 0) | |||
| if isinstance(fill_value, int): # temporary fix | |||
| fill_value = tuple([fill_value] * 3) | |||
| border_type = DE_C_BORDER_TYPE[padding_mode] | |||
| super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value) | |||
| class RandomHorizontalFlip(cde.RandomHorizontalFlipOp): | |||
| """ | |||
| Flip the input image horizontally, randomly with a given probability. | |||
| @@ -192,6 +240,20 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp): | |||
| super().__init__(prob) | |||
| class RandomVerticalFlipWithBBox(cde.RandomVerticalFlipWithBBoxOp): | |||
| """ | |||
| Flip the input image vertically and adjust bounding boxes, randomly with a given probability. | |||
| Args: | |||
| prob (float): Probability of the image being flipped (default=0.5). | |||
| """ | |||
| @check_prob | |||
| def __init__(self, prob=0.5): | |||
| self.prob = prob | |||
| super().__init__(prob) | |||
| class BoundingBoxAug(cde.BoundingBoxAugOp): | |||
| """ | |||
| Flip the input image vertically, randomly with a given probability. | |||
| @@ -237,6 +299,42 @@ class Resize(cde.ResizeOp): | |||
| super().__init__(*size, interpoltn) | |||
| class RandomResizedCropWithBBox(cde.RandomCropAndResizeWithBBoxOp): | |||
| """ | |||
| Crop the input image to a random size and aspect ratio and adjust the Bounding Boxes accordingly | |||
| Args: | |||
| size (int or sequence): The size of the output image. | |||
| If size is an int, a square crop of size (size, size) is returned. | |||
| If size is a sequence of length 2, it should be (height, width). | |||
| scale (tuple, optional): Range (min, max) of respective size of the original | |||
| size to be cropped (default=(0.08, 1.0)). | |||
| ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped | |||
| (default=(3. / 4., 4. / 3.)). | |||
| interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). | |||
| It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. | |||
| - Inter.BILINEAR, means interpolation method is bilinear interpolation. | |||
| - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. | |||
| - Inter.BICUBIC, means interpolation method is bicubic interpolation. | |||
| max_attempts (int, optional): The maximum number of attempts to propose a valid | |||
| crop_area (default=10). If exceeded, fall back to use center_crop instead. | |||
| """ | |||
| @check_random_resize_crop | |||
| def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), | |||
| interpolation=Inter.BILINEAR, max_attempts=10): | |||
| self.size = size | |||
| self.scale = scale | |||
| self.ratio = ratio | |||
| self.interpolation = interpolation | |||
| self.max_attempts = max_attempts | |||
| interpoltn = DE_C_INTER_MODE[interpolation] | |||
| super().__init__(*size, *scale, *ratio, interpoltn, max_attempts) | |||
| class RandomResizedCrop(cde.RandomCropAndResizeOp): | |||
| """ | |||
| Crop the input image to a random size and aspect ratio. | |||