From 6276c0f22f889e7ba1008976236b4564d8575985 Mon Sep 17 00:00:00 2001 From: xiefangqi Date: Tue, 4 Aug 2020 16:00:37 +0800 Subject: [PATCH] new c++ api ==> CelebA --- .../ccsrc/minddata/dataset/api/datasets.cc | 63 +++++++++++- .../ccsrc/minddata/dataset/include/datasets.h | 49 +++++++++- tests/ut/cpp/dataset/c_api_test.cc | 98 ++++++++++++++++++- 3 files changed, 202 insertions(+), 8 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index 99e657d93d..0e6090f128 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -21,6 +21,7 @@ #include "minddata/dataset/include/transforms.h" #include "minddata/dataset/engine/dataset_iterator.h" // Source dataset headers (in alphabetical order) +#include "minddata/dataset/engine/datasetops/source/celeba_op.h" #include "minddata/dataset/engine/datasetops/source/cifar_op.h" #include "minddata/dataset/engine/datasetops/source/coco_op.h" #include "minddata/dataset/engine/datasetops/source/image_folder_op.h" @@ -91,6 +92,16 @@ Dataset::Dataset() { // FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS // (In alphabetical order) +// Function to create a CelebADataset. +std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &dataset_type, + const std::shared_ptr &sampler, const bool &decode, + const std::set &extensions) { + auto ds = std::make_shared(dataset_dir, dataset_type, sampler, decode, extensions); + + // Call derived class validation method. + return ds->ValidateParams() ? ds : nullptr; +} + // Function to create a Cifar10Dataset. std::shared_ptr Cifar10(const std::string &dataset_dir, std::shared_ptr sampler) { auto ds = std::make_shared(dataset_dir, sampler); @@ -109,7 +120,8 @@ std::shared_ptr Cifar100(const std::string &dataset_dir, std::s // Function to create a CocoDataset. std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, - const std::string &task, bool decode, std::shared_ptr sampler) { + const std::string &task, const bool &decode, + const std::shared_ptr &sampler) { auto ds = std::make_shared(dataset_dir, annotation_file, task, decode, sampler); // Call derived class validation method. @@ -334,6 +346,53 @@ bool ValidateCommonDatasetParams(std::string dataset_dir) { // DERIVED DATASET CLASSES LEAF-NODE DATASETS // (In alphabetical order) +// Constructor for CelebADataset +CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &dataset_type, + const std::shared_ptr &sampler, const bool &decode, + const std::set &extensions) + : dataset_dir_(dataset_dir), + dataset_type_(dataset_type), + sampler_(sampler), + decode_(decode), + extensions_(extensions) {} + +bool CelebADataset::ValidateParams() { + Path dir(dataset_dir_); + if (!dir.IsDirectory()) { + MS_LOG(ERROR) << "Invalid dataset path or no dataset path is specified."; + return false; + } + std::set dataset_type_list = {"all", "train", "valid", "test"}; + auto iter = dataset_type_list.find(dataset_type_); + if (iter == dataset_type_list.end()) { + MS_LOG(ERROR) << "dataset_type should be one of 'all', 'train', 'valid' or 'test'."; + return false; + } + return true; +} + +// Function to build CelebADataset +std::vector> CelebADataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + // If user does not specify Sampler, create a default sampler based on the shuffle variable. + if (sampler_ == nullptr) { + sampler_ = CreateDefaultSampler(); + } + + std::unique_ptr schema = std::make_unique(); + RETURN_EMPTY_IF_ERROR( + schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); + // label is like this:0 1 0 0 1...... + RETURN_EMPTY_IF_ERROR( + schema->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + node_ops.push_back(std::make_shared(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, + decode_, dataset_type_, extensions_, std::move(schema), + std::move(sampler_->Build()))); + return node_ops; +} + // Constructor for Cifar10Dataset Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr sampler) : dataset_dir_(dataset_dir), sampler_(sampler) {} @@ -396,7 +455,7 @@ std::vector> Cifar100Dataset::Build() { // Constructor for CocoDataset CocoDataset::CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, - bool decode, std::shared_ptr sampler) + const bool &decode, const std::shared_ptr &sampler) : dataset_dir_(dataset_dir), annotation_file_(annotation_file), task_(task), decode_(decode), sampler_(sampler) {} bool CocoDataset::ValidateParams() { diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h index ba6ae5ff1b..eefcb024f6 100644 --- a/mindspore/ccsrc/minddata/dataset/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h @@ -41,6 +41,7 @@ namespace api { class TensorOperation; class SamplerObj; // Datasets classes (in alphabetical order) +class CelebADataset; class Cifar10Dataset; class Cifar100Dataset; class CocoDataset; @@ -59,6 +60,20 @@ class SkipDataset; class TakeDataset; class ZipDataset; +/// \brief Function to create a CelebADataset +/// \notes The generated dataset has two columns ['image', 'attr']. +// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'. +/// \param[in] decode Decode the images after reading (default=False). +/// \param[in] extensions List of file extensions to be included in the dataset (default=None). +/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` +/// will be used to randomly iterate the entire dataset +/// \return Shared pointer to the current Dataset +std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &dataset_type = "all", + const std::shared_ptr &sampler = nullptr, const bool &decode = false, + const std::set &extensions = {}); + /// \brief Function to create a Cifar10 Dataset /// \notes The generated dataset has two columns ['image', 'label'] /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -93,8 +108,8 @@ std::shared_ptr Cifar100(const std::string &dataset_dir, /// will be used to randomly iterate the entire dataset /// \return Shared pointer to the current Dataset std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, - const std::string &task = "Detection", bool decode = false, - std::shared_ptr sampler = nullptr); + const std::string &task = "Detection", const bool &decode = false, + const std::shared_ptr &sampler = nullptr); /// \brief Function to create an ImageFolderDataset /// \notes A source dataset that reads images from a tree of directories @@ -277,6 +292,32 @@ class Dataset : public std::enable_shared_from_this { /* ####################################### Derived Dataset classes ################################# */ +class CelebADataset : public Dataset { + public: + /// \brief Constructor + CelebADataset(const std::string &dataset_dir, const std::string &dataset_type, + const std::shared_ptr &sampler, const bool &decode, + const std::set &extensions); + + /// \brief Destructor + ~CelebADataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::vector> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + std::string dataset_dir_; + std::string dataset_type_; + bool decode_; + std::set extensions_; + std::shared_ptr sampler_; +}; + class Cifar10Dataset : public Dataset { public: /// \brief Constructor @@ -322,8 +363,8 @@ class Cifar100Dataset : public Dataset { class CocoDataset : public Dataset { public: /// \brief Constructor - CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, bool decode, - std::shared_ptr sampler); + CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, + const bool &decode, const std::shared_ptr &sampler); /// \brief Destructor ~CocoDataset() = default; diff --git a/tests/ut/cpp/dataset/c_api_test.cc b/tests/ut/cpp/dataset/c_api_test.cc index 4e5db80fe3..11e84f7de4 100644 --- a/tests/ut/cpp/dataset/c_api_test.cc +++ b/tests/ut/cpp/dataset/c_api_test.cc @@ -1639,7 +1639,7 @@ TEST_F(MindDataTestPipeline, TestCocoPanoptic) { } TEST_F(MindDataTestPipeline, TestCocoDefault) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDetection."; + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDefault."; // Create a Coco Dataset std::string folder_path = datasets_root_path_ + "/testCOCO/train"; std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json"; @@ -1675,7 +1675,7 @@ TEST_F(MindDataTestPipeline, TestCocoDefault) { } TEST_F(MindDataTestPipeline, TestCocoException) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDetection."; + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoException."; // Create a Coco Dataset std::string folder_path = datasets_root_path_ + "/testCOCO/train"; std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json"; @@ -1841,3 +1841,97 @@ TEST_F(MindDataTestPipeline, TestConcatFail2) { ds = ds->Concat({}); EXPECT_EQ(ds, nullptr); } + +TEST_F(MindDataTestPipeline, TestCelebADataset) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADataset."; + + // Create a CelebA Dataset + std::string folder_path = datasets_root_path_ + "/testCelebAData/"; + std::shared_ptr ds = CelebA(folder_path, "all", SequentialSampler(0, 2), false, {}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + // Check if CelebAOp read correct images/attr + std::string expect_file[] = {"1.JPEG", "2.jpg"}; + std::vector> expect_attr_vector = + {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, + 1, 0, 0, 1}, {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 1}}; + uint64_t i = 0; + while (row.size() != 0) { + auto image = row["image"]; + auto attr = row["attr"]; + + std::shared_ptr expect_image; + Tensor::CreateFromFile(folder_path + expect_file[i], &expect_image); + EXPECT_EQ(*image, *expect_image); + + std::shared_ptr expect_attr; + Tensor::CreateFromVector(expect_attr_vector[i], TensorShape({40}), &expect_attr); + EXPECT_EQ(*attr, *expect_attr); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 2); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestCelebADefault) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADefault."; + + // Create a CelebA Dataset + std::string folder_path = datasets_root_path_ + "/testCelebAData/"; + std::shared_ptr ds = CelebA(folder_path); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + // Check if CelebAOp read correct images/attr + uint64_t i = 0; + while (row.size() != 0) { + auto image = row["image"]; + auto attr = row["attr"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + MS_LOG(INFO) << "Tensor attr shape: " << attr->shape(); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 2); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestCelebAException) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebAException."; + + // Create a CelebA Dataset + std::string folder_path = datasets_root_path_ + "/testCelebAData/"; + std::string invalid_folder_path = "./testNotExist"; + std::string invalid_dataset_type = "invalid_type"; + std::shared_ptr ds = CelebA(invalid_folder_path); + EXPECT_EQ(ds, nullptr); + std::shared_ptr ds1 = CelebA(folder_path, invalid_dataset_type); + EXPECT_EQ(ds1, nullptr); +} \ No newline at end of file