| @@ -366,12 +366,14 @@ install( | |||
| ## Public header files for minddata | |||
| install( | |||
| FILES ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h | |||
| FILES ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/config.h | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/text.h | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/transforms.h | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision_ascend.h | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h | |||
| DESTINATION ${INSTALL_BASE_DIR}/include/dataset | |||
| COMPONENT mindspore | |||
| ) | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/include/audio.h" | |||
| #include "minddata/dataset/include/dataset/audio.h" | |||
| #include "minddata/dataset/audio/ir/kernels/audio_ir.h" | |||
| @@ -223,82 +223,13 @@ Execute::~Execute() { | |||
| } | |||
| Status Execute::operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output) { | |||
| // Validate input tensor | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.DataSize() > 0, "Input Tensor has no data"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(validate_device_(), "Device Type should be 'Ascend310' or 'CPU'"); | |||
| // Parse TensorTransform transforms_ into TensorOperation ops_ | |||
| if (info_->init_with_shared_ptr_) { | |||
| RETURN_IF_NOT_OK(ParseTransforms_()); | |||
| info_->init_with_shared_ptr_ = false; | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!ops_.empty(), "Input TensorOperation should be provided"); | |||
| // Validate and build runtime ops | |||
| std::vector<std::shared_ptr<TensorOp>> transforms; // record the transformations | |||
| std::map<MapTargetDevice, std::string> env_list = { | |||
| {MapTargetDevice::kCpu, "kCpu"}, {MapTargetDevice::kGpu, "kGpu"}, {MapTargetDevice::kAscend310, "kAscend310"}}; | |||
| for (int32_t i = 0; i < ops_.size(); i++) { | |||
| if (ops_[i] == nullptr) { | |||
| MS_LOG(ERROR) << "Input TensorOperation[" | |||
| << std::to_string(i) + "] is unsupported on your input device:" << env_list.at(device_type_); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ops_[i] != nullptr, "Input TensorOperation[" + std::to_string(i) + "] is null"); | |||
| RETURN_IF_NOT_OK(ops_[i]->ValidateParams()); | |||
| transforms.emplace_back(ops_[i]->Build()); | |||
| } | |||
| if (device_type_ == MapTargetDevice::kCpu) { | |||
| // Convert mindspore::Tensor to dataset::Tensor | |||
| std::shared_ptr<dataset::Tensor> de_tensor; | |||
| Status rc = dataset::Tensor::CreateFromMemory(dataset::TensorShape(input.Shape()), | |||
| MSTypeToDEType(static_cast<TypeId>(input.DataType())), | |||
| (const uchar *)(input.Data().get()), input.DataSize(), &de_tensor); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << rc; | |||
| return rc; | |||
| } | |||
| // Apply transforms on tensor | |||
| for (auto &t : transforms) { | |||
| std::shared_ptr<dataset::Tensor> de_output; | |||
| Status rc_ = t->Compute(de_tensor, &de_output); | |||
| if (rc_.IsError()) { | |||
| MS_LOG(ERROR) << rc_; | |||
| return rc_; | |||
| } | |||
| // For next transform | |||
| de_tensor = std::move(de_output); | |||
| } | |||
| // Convert dataset::Tensor to mindspore::Tensor | |||
| CHECK_FAIL_RETURN_UNEXPECTED(de_tensor->HasData(), "Apply transform failed, output tensor has no data"); | |||
| *output = mindspore::MSTensor(std::make_shared<DETensor>(de_tensor)); | |||
| } else { // Ascend310 case, where we must set Ascend resource on each operators | |||
| #ifdef ENABLE_ACL | |||
| CHECK_FAIL_RETURN_UNEXPECTED(device_resource_, "Device resource is nullptr which is illegal under case Ascend310"); | |||
| // Sink data from host into device | |||
| std::shared_ptr<mindspore::dataset::DeviceTensor> device_input; | |||
| RETURN_IF_NOT_OK(device_resource_->Sink(input, &device_input)); | |||
| for (auto &t : transforms) { | |||
| // Initialize AscendResource for each operators | |||
| std::shared_ptr<DeviceTensor> device_output; | |||
| RETURN_IF_NOT_OK(t->SetAscendResource(device_resource_)); | |||
| RETURN_IF_NOT_OK(t->Compute(device_input, &device_output)); | |||
| // For next transform | |||
| device_input = std::move(device_output); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(device_input->HasDeviceData(), "Apply transform failed, output tensor has no data"); | |||
| *output = mindspore::MSTensor(std::make_shared<DETensor>(device_input, true)); | |||
| #endif | |||
| } | |||
| std::vector<MSTensor> input_tensors = {input}; | |||
| std::vector<MSTensor> output_tensors; | |||
| RETURN_IF_NOT_OK(operator()(input_tensors, &output_tensors)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(output_tensors.size() == 1, "Result of Execute has more than 1 outputs (got " + | |||
| std::to_string(output_tensors.size()) + | |||
| "), use Execute(vector, vector) instead."); | |||
| *output = output_tensors[0]; | |||
| return Status::OK(); | |||
| } | |||
| @@ -1,171 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/api/python/pybind_register.h" | |||
| #include "pybind11/pybind11.h" | |||
| #include "pybind11/stl_bind.h" | |||
| #include "minddata/dataset/engine/datasetops/dataset_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/cifar_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/clue_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/csv_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/coco_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/image_folder_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/io_block.h" | |||
| #include "minddata/dataset/engine/datasetops/source/manifest_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/mindrecord_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/mnist_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/text_file_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/voc_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| PYBIND_REGISTER(CifarOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<CifarOp, DatasetOp, std::shared_ptr<CifarOp>>(*m, "CifarOp") | |||
| .def_static("get_num_rows", [](const std::string &dir, const std::string &usage, bool isCifar10) { | |||
| int64_t count = 0; | |||
| THROW_IF_ERROR(CifarOp::CountTotalRows(dir, usage, isCifar10, &count)); | |||
| return count; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(ClueOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<ClueOp, DatasetOp, std::shared_ptr<ClueOp>>(*m, "ClueOp") | |||
| .def_static("get_num_rows", [](const py::list &files) { | |||
| int64_t count = 0; | |||
| std::vector<std::string> filenames; | |||
| for (auto file : files) { | |||
| file.is_none() ? (void)filenames.emplace_back("") : filenames.push_back(py::str(file)); | |||
| } | |||
| THROW_IF_ERROR(ClueOp::CountAllFileRows(filenames, &count)); | |||
| return count; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(CsvOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<CsvOp, DatasetOp, std::shared_ptr<CsvOp>>(*m, "CsvOp") | |||
| .def_static("get_num_rows", [](const py::list &files, bool csv_header) { | |||
| int64_t count = 0; | |||
| std::vector<std::string> filenames; | |||
| for (auto file : files) { | |||
| file.is_none() ? (void)filenames.emplace_back("") : filenames.push_back(py::str(file)); | |||
| } | |||
| THROW_IF_ERROR(CsvOp::CountAllFileRows(filenames, csv_header, &count)); | |||
| return count; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(CocoOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<CocoOp, DatasetOp, std::shared_ptr<CocoOp>>(*m, "CocoOp") | |||
| .def_static("get_class_indexing", | |||
| [](const std::string &dir, const std::string &file, const std::string &task) { | |||
| std::vector<std::pair<std::string, std::vector<int32_t>>> output_class_indexing; | |||
| THROW_IF_ERROR(CocoOp::GetClassIndexing(dir, file, task, &output_class_indexing)); | |||
| return output_class_indexing; | |||
| }) | |||
| .def_static("get_num_rows", | |||
| [](const std::string &dir, const std::string &file, const std::string &task) { | |||
| int64_t count = 0; | |||
| THROW_IF_ERROR(CocoOp::CountTotalRows(dir, file, task, &count)); | |||
| return count; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(ImageFolderOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<ImageFolderOp, DatasetOp, std::shared_ptr<ImageFolderOp>>(*m, "ImageFolderOp") | |||
| .def_static("get_num_rows", | |||
| [](const std::string &path) { | |||
| int64_t count = 0; | |||
| THROW_IF_ERROR(ImageFolderOp::CountRowsAndClasses(path, {}, &count, nullptr, {})); | |||
| return count; | |||
| }) | |||
| .def_static("get_num_classes", [](const std::string &path, | |||
| const std::map<std::string, int32_t> class_index) { | |||
| int64_t num_classes = 0; | |||
| THROW_IF_ERROR(ImageFolderOp::CountRowsAndClasses(path, {}, nullptr, &num_classes, class_index)); | |||
| return num_classes; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(ManifestOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<ManifestOp, DatasetOp, std::shared_ptr<ManifestOp>>(*m, "ManifestOp"); | |||
| })); | |||
| PYBIND_REGISTER(MindRecordOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<MindRecordOp, DatasetOp, std::shared_ptr<MindRecordOp>>(*m, "MindRecordOp") | |||
| .def_static("get_num_rows", [](const std::vector<std::string> &paths, bool load_dataset, | |||
| const py::object &sampler, const int64_t num_padded) { | |||
| int64_t count = 0; | |||
| std::shared_ptr<mindrecord::ShardOperator> op; | |||
| if (py::hasattr(sampler, "create_for_minddataset")) { | |||
| auto create = sampler.attr("create_for_minddataset"); | |||
| op = create().cast<std::shared_ptr<mindrecord::ShardOperator>>(); | |||
| } | |||
| THROW_IF_ERROR(MindRecordOp::CountTotalRows(paths, load_dataset, op, &count, num_padded)); | |||
| return count; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(MnistOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<MnistOp, DatasetOp, std::shared_ptr<MnistOp>>(*m, "MnistOp") | |||
| .def_static("get_num_rows", [](const std::string &dir, const std::string &usage) { | |||
| int64_t count = 0; | |||
| THROW_IF_ERROR(MnistOp::CountTotalRows(dir, usage, &count)); | |||
| return count; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(TextFileOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<TextFileOp, DatasetOp, std::shared_ptr<TextFileOp>>(*m, "TextFileOp") | |||
| .def_static("get_num_rows", [](const py::list &files) { | |||
| int64_t count = 0; | |||
| std::vector<std::string> filenames; | |||
| for (auto file : files) { | |||
| !file.is_none() ? filenames.push_back(py::str(file)) : (void)filenames.emplace_back(""); | |||
| } | |||
| THROW_IF_ERROR(TextFileOp::CountAllFileRows(filenames, &count)); | |||
| return count; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(TFReaderOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<TFReaderOp, DatasetOp, std::shared_ptr<TFReaderOp>>(*m, "TFReaderOp") | |||
| .def_static( | |||
| "get_num_rows", [](const py::list &files, int64_t numParallelWorkers, bool estimate = false) { | |||
| int64_t count = 0; | |||
| std::vector<std::string> filenames; | |||
| for (auto l : files) { | |||
| !l.is_none() ? filenames.push_back(py::str(l)) : (void)filenames.emplace_back(""); | |||
| } | |||
| THROW_IF_ERROR(TFReaderOp::CountTotalRows(&count, filenames, numParallelWorkers, estimate)); | |||
| return count; | |||
| }); | |||
| })); | |||
| PYBIND_REGISTER(VOCOp, 1, ([](const py::module *m) { | |||
| (void)py::class_<VOCOp, DatasetOp, std::shared_ptr<VOCOp>>(*m, "VOCOp") | |||
| .def_static("get_class_indexing", [](const std::string &dir, const std::string &task_type, | |||
| const std::string &task_mode, const py::dict &dict) { | |||
| std::map<std::string, int32_t> output_class_indexing; | |||
| THROW_IF_ERROR(VOCOp::GetClassIndexing(dir, task_type, task_mode, dict, &output_class_indexing)); | |||
| return output_class_indexing; | |||
| }); | |||
| })); | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_AUDIO_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_AUDIO_H_ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -24,4 +24,4 @@ namespace audio {} // namespace audio | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_AUDIO_H_ | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_ | |||
| @@ -723,9 +723,9 @@ class RandomCropDecodeResize(ImageTensorOperation): | |||
| size (Union[int, sequence]): The size of the output image. | |||
| If size is an integer, a square crop of size (size, size) is returned. | |||
| If size is a sequence of length 2, it should be (height, width). | |||
| scale (tuple, optional): Range [min, max) of respective size of the | |||
| scale (list, tuple, optional): Range [min, max) of respective size of the | |||
| original size to be cropped (default=(0.08, 1.0)). | |||
| ratio (tuple, optional): Range [min, max) of aspect ratio to be | |||
| ratio (list, tuple, optional): Range [min, max) of aspect ratio to be | |||
| cropped (default=(3. / 4., 4. / 3.)). | |||
| interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). | |||
| It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. | |||
| @@ -918,9 +918,9 @@ class RandomResizedCrop(ImageTensorOperation): | |||
| size (Union[int, sequence]): The size of the output image. | |||
| If size is an integer, a square crop of size (size, size) is returned. | |||
| If size is a sequence of length 2, it should be (height, width). | |||
| scale (tuple, optional): Range [min, max) of respective size of the original | |||
| scale (list, tuple, optional): Range [min, max) of respective size of the original | |||
| size to be cropped (default=(0.08, 1.0)). | |||
| ratio (tuple, optional): Range [min, max) of aspect ratio to be cropped | |||
| ratio (list, tuple, optional): Range [min, max) of aspect ratio to be cropped | |||
| (default=(3. / 4., 4. / 3.)). | |||
| interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). | |||
| It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. | |||
| @@ -972,9 +972,9 @@ class RandomResizedCropWithBBox(ImageTensorOperation): | |||
| size (Union[int, sequence]): The size of the output image. | |||
| If size is an integer, a square crop of size (size, size) is returned. | |||
| If size is a sequence of length 2, it should be (height, width). | |||
| scale (tuple, optional): Range (min, max) of respective size of the original | |||
| scale (list, tuple, optional): Range (min, max) of respective size of the original | |||
| size to be cropped (default=(0.08, 1.0)). | |||
| ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped | |||
| ratio (list, tuple, optional): Range (min, max) of aspect ratio to be cropped | |||
| (default=(3. / 4., 4. / 3.)). | |||
| interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). | |||
| It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. | |||
| @@ -1394,9 +1394,9 @@ class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation): | |||
| size (Union[int, sequence]): The size of the output image. | |||
| If size is an integer, a square crop of size (size, size) is returned. | |||
| If size is a sequence of length 2, it should be (height, width). | |||
| scale (tuple, optional): Range [min, max) of respective size of the | |||
| scale (list, tuple, optional): Range [min, max) of respective size of the | |||
| original size to be cropped (default=(0.08, 1.0)). | |||
| ratio (tuple, optional): Range [min, max) of aspect ratio to be | |||
| ratio (list, tuple, optional): Range [min, max) of aspect ratio to be | |||
| cropped (default=(3. / 4., 4. / 3.)). | |||
| max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10). | |||
| If exceeded, fall back to use center_crop instead. | |||
| @@ -19,6 +19,7 @@ | |||
| #include "minddata/dataset/include/dataset/execute.h" | |||
| #include "minddata/dataset/include/dataset/transforms.h" | |||
| #include "minddata/dataset/include/dataset/vision.h" | |||
| #include "minddata/dataset/include/dataset/text.h" | |||
| #include "utils/log_adapter.h" | |||
| using namespace mindspore::dataset; | |||
| @@ -206,3 +207,41 @@ TEST_F(MindDataTestExecute, TestTransformDecodeResizeCenterCrop1) { | |||
| ASSERT_EQ(image.Shape()[1], 224); | |||
| ASSERT_EQ(image.Shape()[2], 224); | |||
| } | |||
| TEST_F(MindDataTestExecute, TestUniformAugment) { | |||
| // Read images | |||
| auto image = ReadFileToTensor("data/dataset/apple.jpg"); | |||
| // Transform params | |||
| std::shared_ptr<TensorTransform> decode = std::make_shared<vision::Decode>(); | |||
| std::shared_ptr<TensorTransform> resize_op(new vision::Resize({16, 16})); | |||
| std::shared_ptr<TensorTransform> vertical = std::make_shared<vision::RandomVerticalFlip>(); | |||
| std::shared_ptr<TensorTransform> horizontal = std::make_shared<vision::RandomHorizontalFlip>(); | |||
| std::shared_ptr<TensorTransform> uniform_op(new vision::UniformAugment({resize_op, vertical, horizontal}, 3)); | |||
| auto transform1 = Execute({decode}); | |||
| Status rc = transform1(image, &image); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| auto transform2 = Execute({uniform_op}); | |||
| rc = transform2(image, &image); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| } | |||
| TEST_F(MindDataTestExecute, TestBasicTokenizer) { | |||
| std::shared_ptr<Tensor> de_tensor; | |||
| Tensor::CreateScalar<std::string>("Welcome to China.", &de_tensor); | |||
| auto txt = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor)); | |||
| mindspore::MSTensor txt_result; | |||
| // Transform params | |||
| std::shared_ptr<TensorTransform> tokenizer = | |||
| std::make_shared<text::BasicTokenizer>(false, false, NormalizeForm::kNone, false, true); | |||
| // BasicTokenizer has 3 outputs but we only have 1 tensor to receive it, so it will raise an error | |||
| auto transform1 = Execute({tokenizer}); | |||
| Status rc = transform1(txt, &txt_result); | |||
| ASSERT_FALSE(rc.IsOk()); | |||
| MS_LOG(INFO) << rc; | |||
| } | |||