| @@ -86,9 +86,16 @@ Dataset::Dataset() { | |||||
| // (In alphabetical order) | // (In alphabetical order) | ||||
| // Function to create a Cifar10Dataset. | // Function to create a Cifar10Dataset. | ||||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples, | |||||
| std::shared_ptr<SamplerObj> sampler) { | |||||
| auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, num_samples, sampler); | |||||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) { | |||||
| auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, sampler); | |||||
| // Call derived class validation method. | |||||
| return ds->ValidateParams() ? ds : nullptr; | |||||
| } | |||||
| // Function to create a Cifar100Dataset. | |||||
| std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) { | |||||
| auto ds = std::make_shared<Cifar100Dataset>(dataset_dir, sampler); | |||||
| // Call derived class validation method. | // Call derived class validation method. | ||||
| return ds->ValidateParams() ? ds : nullptr; | return ds->ValidateParams() ? ds : nullptr; | ||||
| @@ -250,28 +257,27 @@ std::shared_ptr<SamplerObj> CreateDefaultSampler() { | |||||
| return std::make_shared<RandomSamplerObj>(replacement, num_samples); | return std::make_shared<RandomSamplerObj>(replacement, num_samples); | ||||
| } | } | ||||
| // Helper function to validate dataset params | |||||
| bool ValidateCommonDatasetParams(std::string dataset_dir) { | |||||
| if (dataset_dir.empty()) { | |||||
| MS_LOG(ERROR) << "No dataset path is specified"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| /* ####################################### Derived Dataset classes ################################# */ | /* ####################################### Derived Dataset classes ################################# */ | ||||
| // DERIVED DATASET CLASSES LEAF-NODE DATASETS | // DERIVED DATASET CLASSES LEAF-NODE DATASETS | ||||
| // (In alphabetical order) | // (In alphabetical order) | ||||
| // Constructor for Cifar10Dataset | // Constructor for Cifar10Dataset | ||||
| Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler) | |||||
| : dataset_dir_(dataset_dir), num_samples_(num_samples), sampler_(sampler) {} | |||||
| Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) | |||||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | |||||
| bool Cifar10Dataset::ValidateParams() { | |||||
| if (dataset_dir_.empty()) { | |||||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||||
| return false; | |||||
| } | |||||
| if (num_samples_ < 0) { | |||||
| MS_LOG(ERROR) << "Number of samples cannot be negative"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Cifar10Dataset::ValidateParams() { return ValidateCommonDatasetParams(dataset_dir_); } | |||||
| // Function to build CifarOp | |||||
| // Function to build CifarOp for Cifar10 | |||||
| std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() { | std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() { | ||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | // A vector containing shared pointer to the Dataset Ops that this object will create | ||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | std::vector<std::shared_ptr<DatasetOp>> node_ops; | ||||
| @@ -294,6 +300,37 @@ std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() { | |||||
| return node_ops; | return node_ops; | ||||
| } | } | ||||
| // Constructor for Cifar100Dataset | |||||
| Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) | |||||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | |||||
| bool Cifar100Dataset::ValidateParams() { return ValidateCommonDatasetParams(dataset_dir_); } | |||||
| // Function to build CifarOp for Cifar100 | |||||
| std::vector<std::shared_ptr<DatasetOp>> Cifar100Dataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||||
| if (sampler_ == nullptr) { | |||||
| sampler_ = CreateDefaultSampler(); | |||||
| } | |||||
| // Do internal Schema generation. | |||||
| auto schema = std::make_unique<DataSchema>(); | |||||
| RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||||
| TensorShape scalar = TensorShape::CreateScalar(); | |||||
| RETURN_EMPTY_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("coarse_label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); | |||||
| RETURN_EMPTY_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("fine_label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); | |||||
| node_ops.push_back(std::make_shared<CifarOp>(CifarOp::CifarType::kCifar100, num_workers_, rows_per_buffer_, | |||||
| dataset_dir_, connector_que_size_, std::move(schema), | |||||
| std::move(sampler_->Build()))); | |||||
| return node_ops; | |||||
| } | |||||
| ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, | ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, | ||||
| bool recursive, std::set<std::string> extensions, | bool recursive, std::set<std::string> extensions, | ||||
| std::map<std::string, int32_t> class_indexing) | std::map<std::string, int32_t> class_indexing) | ||||
| @@ -304,14 +341,7 @@ ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std | |||||
| class_indexing_(class_indexing), | class_indexing_(class_indexing), | ||||
| exts_(extensions) {} | exts_(extensions) {} | ||||
| bool ImageFolderDataset::ValidateParams() { | |||||
| if (dataset_dir_.empty()) { | |||||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool ImageFolderDataset::ValidateParams() { return ValidateCommonDatasetParams(dataset_dir_); } | |||||
| std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() { | std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() { | ||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | // A vector containing shared pointer to the Dataset Ops that this object will create | ||||
| @@ -339,14 +369,7 @@ std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() { | |||||
| MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) | MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) | ||||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | : dataset_dir_(dataset_dir), sampler_(sampler) {} | ||||
| bool MnistDataset::ValidateParams() { | |||||
| if (dataset_dir_.empty()) { | |||||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool MnistDataset::ValidateParams() { return ValidateCommonDatasetParams(dataset_dir_); } | |||||
| std::vector<std::shared_ptr<DatasetOp>> MnistDataset::Build() { | std::vector<std::shared_ptr<DatasetOp>> MnistDataset::Build() { | ||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | // A vector containing shared pointer to the Dataset Ops that this object will create | ||||
| @@ -42,6 +42,7 @@ class TensorOperation; | |||||
| class SamplerObj; | class SamplerObj; | ||||
| // Datasets classes (in alphabetical order) | // Datasets classes (in alphabetical order) | ||||
| class Cifar10Dataset; | class Cifar10Dataset; | ||||
| class Cifar100Dataset; | |||||
| class ImageFolderDataset; | class ImageFolderDataset; | ||||
| class MnistDataset; | class MnistDataset; | ||||
| // Dataset Op classes (in alphabetical order) | // Dataset Op classes (in alphabetical order) | ||||
| @@ -57,12 +58,19 @@ class ZipDataset; | |||||
| /// \brief Function to create a Cifar10 Dataset | /// \brief Function to create a Cifar10 Dataset | ||||
| /// \notes The generated dataset has two columns ['image', 'label'] | /// \notes The generated dataset has two columns ['image', 'label'] | ||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | /// \param[in] dataset_dir Path to the root directory that contains the dataset | ||||
| /// \param[in] num_samples The number of images to be included in the dataset | |||||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | ||||
| /// will be used to randomly iterate the entire dataset | /// will be used to randomly iterate the entire dataset | ||||
| /// \return Shared pointer to the current Dataset | /// \return Shared pointer to the current Dataset | ||||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples, | |||||
| std::shared_ptr<SamplerObj> sampler); | |||||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr); | |||||
| /// \brief Function to create a Cifar100 Dataset | |||||
| /// \notes The generated dataset has two columns ['image', 'coarse_label', 'fine_label'] | |||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||||
| /// will be used to randomly iterate the entire dataset | |||||
| /// \return Shared pointer to the current Dataset | |||||
| std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir, | |||||
| std::shared_ptr<SamplerObj> sampler = nullptr); | |||||
| /// \brief Function to create an ImageFolderDataset | /// \brief Function to create an ImageFolderDataset | ||||
| /// \notes A source dataset that reads images from a tree of directories | /// \notes A source dataset that reads images from a tree of directories | ||||
| @@ -204,7 +212,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> { | |||||
| class Cifar10Dataset : public Dataset { | class Cifar10Dataset : public Dataset { | ||||
| public: | public: | ||||
| /// \brief Constructor | /// \brief Constructor | ||||
| Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler); | |||||
| Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler); | |||||
| /// \brief Destructor | /// \brief Destructor | ||||
| ~Cifar10Dataset() = default; | ~Cifar10Dataset() = default; | ||||
| @@ -219,7 +227,27 @@ class Cifar10Dataset : public Dataset { | |||||
| private: | private: | ||||
| std::string dataset_dir_; | std::string dataset_dir_; | ||||
| int32_t num_samples_; | |||||
| std::shared_ptr<SamplerObj> sampler_; | |||||
| }; | |||||
| class Cifar100Dataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| Cifar100Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler); | |||||
| /// \brief Destructor | |||||
| ~Cifar100Dataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return The list of shared pointers to the newly created DatasetOps | |||||
| std::vector<std::shared_ptr<DatasetOp>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::string dataset_dir_; | |||||
| std::shared_ptr<SamplerObj> sampler_; | std::shared_ptr<SamplerObj> sampler_; | ||||
| }; | }; | ||||
| @@ -84,6 +84,12 @@ TEST_F(MindDataTestPipeline, TestBatchAndRepeat) { | |||||
| iter->Stop(); | iter->Stop(); | ||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestMnistFail1) { | |||||
| // Create a Mnist Dataset | |||||
| std::shared_ptr<Dataset> ds = Mnist("", RandomSampler(false, 10)); | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) { | TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) { | ||||
| // Create a Mnist Dataset | // Create a Mnist Dataset | ||||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | std::string folder_path = datasets_root_path_ + "/testMnistData/"; | ||||
| @@ -274,6 +280,12 @@ TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { | |||||
| iter->Stop(); | iter->Stop(); | ||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestImageFolderFail1) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::shared_ptr<Dataset> ds = ImageFolder("", true, nullptr); | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { | TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { | ||||
| std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1); | std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1); | ||||
| EXPECT_NE(sampl, nullptr); | EXPECT_NE(sampl, nullptr); | ||||
| @@ -630,17 +642,7 @@ TEST_F(MindDataTestPipeline, TestCifar10Dataset) { | |||||
| // Create a Cifar10 Dataset | // Create a Cifar10 Dataset | ||||
| std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; | std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; | ||||
| std::shared_ptr<Dataset> ds = Cifar10(folder_path, 0, RandomSampler(false, 10)); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 2; | |||||
| ds = ds->Batch(batch_size); | |||||
| std::shared_ptr<Dataset> ds = Cifar10(folder_path, RandomSampler(false, 10)); | |||||
| EXPECT_NE(ds, nullptr); | EXPECT_NE(ds, nullptr); | ||||
| // Create an iterator over the result of the above dataset | // Create an iterator over the result of the above dataset | ||||
| @@ -652,6 +654,9 @@ TEST_F(MindDataTestPipeline, TestCifar10Dataset) { | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | ||||
| iter->GetNextRow(&row); | iter->GetNextRow(&row); | ||||
| EXPECT_NE(row.find("image"), row.end()); | |||||
| EXPECT_NE(row.find("label"), row.end()); | |||||
| uint64_t i = 0; | uint64_t i = 0; | ||||
| while (row.size() != 0) { | while (row.size() != 0) { | ||||
| i++; | i++; | ||||
| @@ -666,6 +671,54 @@ TEST_F(MindDataTestPipeline, TestCifar10Dataset) { | |||||
| iter->Stop(); | iter->Stop(); | ||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) { | |||||
| // Create a Cifar10 Dataset | |||||
| std::shared_ptr<Dataset> ds = Cifar10("", RandomSampler(false, 10)); | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestCifar100Dataset) { | |||||
| // Create a Cifar100 Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testCifar100Data/"; | |||||
| std::shared_ptr<Dataset> ds = Cifar100(folder_path, RandomSampler(false, 10)); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_NE(iter, nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| EXPECT_NE(row.find("image"), row.end()); | |||||
| EXPECT_NE(row.find("coarse_label"), row.end()); | |||||
| EXPECT_NE(row.find("fine_label"), row.end()); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_EQ(i, 10); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestCifar100DatasetFail1) { | |||||
| // Create a Cifar100 Dataset | |||||
| std::shared_ptr<Dataset> ds = Cifar100("", RandomSampler(false, 10)); | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestRandomColorAdjust) { | TEST_F(MindDataTestPipeline, TestRandomColorAdjust) { | ||||
| // Create an ImageFolder Dataset | // Create an ImageFolder Dataset | ||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | std::string folder_path = datasets_root_path_ + "/testPK/data/"; | ||||
| @@ -843,7 +896,7 @@ TEST_F(MindDataTestPipeline, TestZipSuccess) { | |||||
| EXPECT_NE(ds1, nullptr); | EXPECT_NE(ds1, nullptr); | ||||
| folder_path = datasets_root_path_ + "/testCifar10Data/"; | folder_path = datasets_root_path_ + "/testCifar10Data/"; | ||||
| std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, 0, RandomSampler(false, 10)); | |||||
| std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, RandomSampler(false, 10)); | |||||
| EXPECT_NE(ds2, nullptr); | EXPECT_NE(ds2, nullptr); | ||||
| // Create a Project operation on ds | // Create a Project operation on ds | ||||