| @@ -21,6 +21,7 @@ | |||||
| #include "minddata/dataset/include/transforms.h" | #include "minddata/dataset/include/transforms.h" | ||||
| #include "minddata/dataset/engine/dataset_iterator.h" | #include "minddata/dataset/engine/dataset_iterator.h" | ||||
| // Source dataset headers (in alphabetical order) | // Source dataset headers (in alphabetical order) | ||||
| #include "minddata/dataset/engine/datasetops/source/celeba_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/cifar_op.h" | #include "minddata/dataset/engine/datasetops/source/cifar_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/coco_op.h" | #include "minddata/dataset/engine/datasetops/source/coco_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/image_folder_op.h" | #include "minddata/dataset/engine/datasetops/source/image_folder_op.h" | ||||
| @@ -91,6 +92,16 @@ Dataset::Dataset() { | |||||
| // FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS | // FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS | ||||
| // (In alphabetical order) | // (In alphabetical order) | ||||
| // Function to create a CelebADataset. | |||||
| std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type, | |||||
| const std::shared_ptr<SamplerObj> &sampler, const bool &decode, | |||||
| const std::set<std::string> &extensions) { | |||||
| auto ds = std::make_shared<CelebADataset>(dataset_dir, dataset_type, sampler, decode, extensions); | |||||
| // Call derived class validation method. | |||||
| return ds->ValidateParams() ? ds : nullptr; | |||||
| } | |||||
| // Function to create a Cifar10Dataset. | // Function to create a Cifar10Dataset. | ||||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) { | std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) { | ||||
| auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, sampler); | auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, sampler); | ||||
| @@ -109,7 +120,8 @@ std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir, std::s | |||||
| // Function to create a CocoDataset. | // Function to create a CocoDataset. | ||||
| std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file, | std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file, | ||||
| const std::string &task, bool decode, std::shared_ptr<SamplerObj> sampler) { | |||||
| const std::string &task, const bool &decode, | |||||
| const std::shared_ptr<SamplerObj> &sampler) { | |||||
| auto ds = std::make_shared<CocoDataset>(dataset_dir, annotation_file, task, decode, sampler); | auto ds = std::make_shared<CocoDataset>(dataset_dir, annotation_file, task, decode, sampler); | ||||
| // Call derived class validation method. | // Call derived class validation method. | ||||
| @@ -334,6 +346,53 @@ bool ValidateCommonDatasetParams(std::string dataset_dir) { | |||||
| // DERIVED DATASET CLASSES LEAF-NODE DATASETS | // DERIVED DATASET CLASSES LEAF-NODE DATASETS | ||||
| // (In alphabetical order) | // (In alphabetical order) | ||||
| // Constructor for CelebADataset | |||||
| CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &dataset_type, | |||||
| const std::shared_ptr<SamplerObj> &sampler, const bool &decode, | |||||
| const std::set<std::string> &extensions) | |||||
| : dataset_dir_(dataset_dir), | |||||
| dataset_type_(dataset_type), | |||||
| sampler_(sampler), | |||||
| decode_(decode), | |||||
| extensions_(extensions) {} | |||||
| bool CelebADataset::ValidateParams() { | |||||
| Path dir(dataset_dir_); | |||||
| if (!dir.IsDirectory()) { | |||||
| MS_LOG(ERROR) << "Invalid dataset path or no dataset path is specified."; | |||||
| return false; | |||||
| } | |||||
| std::set<std::string> dataset_type_list = {"all", "train", "valid", "test"}; | |||||
| auto iter = dataset_type_list.find(dataset_type_); | |||||
| if (iter == dataset_type_list.end()) { | |||||
| MS_LOG(ERROR) << "dataset_type should be one of 'all', 'train', 'valid' or 'test'."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| // Function to build CelebADataset | |||||
| std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||||
| if (sampler_ == nullptr) { | |||||
| sampler_ = CreateDefaultSampler(); | |||||
| } | |||||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||||
| RETURN_EMPTY_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||||
| // label is like this:0 1 0 0 1...... | |||||
| RETURN_EMPTY_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||||
| node_ops.push_back(std::make_shared<CelebAOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||||
| decode_, dataset_type_, extensions_, std::move(schema), | |||||
| std::move(sampler_->Build()))); | |||||
| return node_ops; | |||||
| } | |||||
| // Constructor for Cifar10Dataset | // Constructor for Cifar10Dataset | ||||
| Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) | Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) | ||||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | : dataset_dir_(dataset_dir), sampler_(sampler) {} | ||||
| @@ -396,7 +455,7 @@ std::vector<std::shared_ptr<DatasetOp>> Cifar100Dataset::Build() { | |||||
| // Constructor for CocoDataset | // Constructor for CocoDataset | ||||
| CocoDataset::CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, | CocoDataset::CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, | ||||
| bool decode, std::shared_ptr<SamplerObj> sampler) | |||||
| const bool &decode, const std::shared_ptr<SamplerObj> &sampler) | |||||
| : dataset_dir_(dataset_dir), annotation_file_(annotation_file), task_(task), decode_(decode), sampler_(sampler) {} | : dataset_dir_(dataset_dir), annotation_file_(annotation_file), task_(task), decode_(decode), sampler_(sampler) {} | ||||
| bool CocoDataset::ValidateParams() { | bool CocoDataset::ValidateParams() { | ||||
| @@ -41,6 +41,7 @@ namespace api { | |||||
| class TensorOperation; | class TensorOperation; | ||||
| class SamplerObj; | class SamplerObj; | ||||
| // Datasets classes (in alphabetical order) | // Datasets classes (in alphabetical order) | ||||
| class CelebADataset; | |||||
| class Cifar10Dataset; | class Cifar10Dataset; | ||||
| class Cifar100Dataset; | class Cifar100Dataset; | ||||
| class CocoDataset; | class CocoDataset; | ||||
| @@ -59,6 +60,20 @@ class SkipDataset; | |||||
| class TakeDataset; | class TakeDataset; | ||||
| class ZipDataset; | class ZipDataset; | ||||
| /// \brief Function to create a CelebADataset | |||||
| /// \notes The generated dataset has two columns ['image', 'attr']. | |||||
| // The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. | |||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset. | |||||
| /// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'. | |||||
| /// \param[in] decode Decode the images after reading (default=False). | |||||
| /// \param[in] extensions List of file extensions to be included in the dataset (default=None). | |||||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||||
| /// will be used to randomly iterate the entire dataset | |||||
| /// \return Shared pointer to the current Dataset | |||||
| std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type = "all", | |||||
| const std::shared_ptr<SamplerObj> &sampler = nullptr, const bool &decode = false, | |||||
| const std::set<std::string> &extensions = {}); | |||||
| /// \brief Function to create a Cifar10 Dataset | /// \brief Function to create a Cifar10 Dataset | ||||
| /// \notes The generated dataset has two columns ['image', 'label'] | /// \notes The generated dataset has two columns ['image', 'label'] | ||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | /// \param[in] dataset_dir Path to the root directory that contains the dataset | ||||
| @@ -93,8 +108,8 @@ std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir, | |||||
| /// will be used to randomly iterate the entire dataset | /// will be used to randomly iterate the entire dataset | ||||
| /// \return Shared pointer to the current Dataset | /// \return Shared pointer to the current Dataset | ||||
| std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file, | std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file, | ||||
| const std::string &task = "Detection", bool decode = false, | |||||
| std::shared_ptr<SamplerObj> sampler = nullptr); | |||||
| const std::string &task = "Detection", const bool &decode = false, | |||||
| const std::shared_ptr<SamplerObj> &sampler = nullptr); | |||||
| /// \brief Function to create an ImageFolderDataset | /// \brief Function to create an ImageFolderDataset | ||||
| /// \notes A source dataset that reads images from a tree of directories | /// \notes A source dataset that reads images from a tree of directories | ||||
| @@ -277,6 +292,32 @@ class Dataset : public std::enable_shared_from_this<Dataset> { | |||||
| /* ####################################### Derived Dataset classes ################################# */ | /* ####################################### Derived Dataset classes ################################# */ | ||||
| class CelebADataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| CelebADataset(const std::string &dataset_dir, const std::string &dataset_type, | |||||
| const std::shared_ptr<SamplerObj> &sampler, const bool &decode, | |||||
| const std::set<std::string> &extensions); | |||||
| /// \brief Destructor | |||||
| ~CelebADataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| std::vector<std::shared_ptr<DatasetOp>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::string dataset_dir_; | |||||
| std::string dataset_type_; | |||||
| bool decode_; | |||||
| std::set<std::string> extensions_; | |||||
| std::shared_ptr<SamplerObj> sampler_; | |||||
| }; | |||||
| class Cifar10Dataset : public Dataset { | class Cifar10Dataset : public Dataset { | ||||
| public: | public: | ||||
| /// \brief Constructor | /// \brief Constructor | ||||
| @@ -322,8 +363,8 @@ class Cifar100Dataset : public Dataset { | |||||
| class CocoDataset : public Dataset { | class CocoDataset : public Dataset { | ||||
| public: | public: | ||||
| /// \brief Constructor | /// \brief Constructor | ||||
| CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, bool decode, | |||||
| std::shared_ptr<SamplerObj> sampler); | |||||
| CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, | |||||
| const bool &decode, const std::shared_ptr<SamplerObj> &sampler); | |||||
| /// \brief Destructor | /// \brief Destructor | ||||
| ~CocoDataset() = default; | ~CocoDataset() = default; | ||||
| @@ -1639,7 +1639,7 @@ TEST_F(MindDataTestPipeline, TestCocoPanoptic) { | |||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestCocoDefault) { | TEST_F(MindDataTestPipeline, TestCocoDefault) { | ||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDetection."; | |||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDefault."; | |||||
| // Create a Coco Dataset | // Create a Coco Dataset | ||||
| std::string folder_path = datasets_root_path_ + "/testCOCO/train"; | std::string folder_path = datasets_root_path_ + "/testCOCO/train"; | ||||
| std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json"; | std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json"; | ||||
| @@ -1675,7 +1675,7 @@ TEST_F(MindDataTestPipeline, TestCocoDefault) { | |||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestCocoException) { | TEST_F(MindDataTestPipeline, TestCocoException) { | ||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDetection."; | |||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoException."; | |||||
| // Create a Coco Dataset | // Create a Coco Dataset | ||||
| std::string folder_path = datasets_root_path_ + "/testCOCO/train"; | std::string folder_path = datasets_root_path_ + "/testCOCO/train"; | ||||
| std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json"; | std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json"; | ||||
| @@ -1841,3 +1841,97 @@ TEST_F(MindDataTestPipeline, TestConcatFail2) { | |||||
| ds = ds->Concat({}); | ds = ds->Concat({}); | ||||
| EXPECT_EQ(ds, nullptr); | EXPECT_EQ(ds, nullptr); | ||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestCelebADataset) { | |||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADataset."; | |||||
| // Create a CelebA Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testCelebAData/"; | |||||
| std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", SequentialSampler(0, 2), false, {}); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_NE(iter, nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| // Check if CelebAOp read correct images/attr | |||||
| std::string expect_file[] = {"1.JPEG", "2.jpg"}; | |||||
| std::vector<std::vector<uint32_t>> expect_attr_vector = | |||||
| {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, | |||||
| 1, 0, 0, 1}, {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, | |||||
| 1, 0, 0, 0, 0, 0, 0, 0, 1}}; | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| auto image = row["image"]; | |||||
| auto attr = row["attr"]; | |||||
| std::shared_ptr<Tensor> expect_image; | |||||
| Tensor::CreateFromFile(folder_path + expect_file[i], &expect_image); | |||||
| EXPECT_EQ(*image, *expect_image); | |||||
| std::shared_ptr<Tensor> expect_attr; | |||||
| Tensor::CreateFromVector(expect_attr_vector[i], TensorShape({40}), &expect_attr); | |||||
| EXPECT_EQ(*attr, *expect_attr); | |||||
| iter->GetNextRow(&row); | |||||
| i++; | |||||
| } | |||||
| EXPECT_EQ(i, 2); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestCelebADefault) { | |||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADefault."; | |||||
| // Create a CelebA Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testCelebAData/"; | |||||
| std::shared_ptr<Dataset> ds = CelebA(folder_path); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_NE(iter, nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| // Check if CelebAOp read correct images/attr | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| auto image = row["image"]; | |||||
| auto attr = row["attr"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| MS_LOG(INFO) << "Tensor attr shape: " << attr->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| i++; | |||||
| } | |||||
| EXPECT_EQ(i, 2); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestCelebAException) { | |||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebAException."; | |||||
| // Create a CelebA Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testCelebAData/"; | |||||
| std::string invalid_folder_path = "./testNotExist"; | |||||
| std::string invalid_dataset_type = "invalid_type"; | |||||
| std::shared_ptr<Dataset> ds = CelebA(invalid_folder_path); | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| std::shared_ptr<Dataset> ds1 = CelebA(folder_path, invalid_dataset_type); | |||||
| EXPECT_EQ(ds1, nullptr); | |||||
| } | |||||