diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index b02de78d74..e12906499a 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -27,6 +27,7 @@ #include "minddata/dataset/engine/datasetops/source/voc_op.h" // Dataset operator headers (in alphabetical order) #include "minddata/dataset/engine/datasetops/batch_op.h" +#include "minddata/dataset/engine/datasetops/concat_op.h" #include "minddata/dataset/engine/datasetops/map_op/map_op.h" #include "minddata/dataset/engine/datasetops/project_op.h" #include "minddata/dataset/engine/datasetops/rename_op.h" @@ -127,6 +128,14 @@ std::shared_ptr Mnist(std::string dataset_dir, std::shared_ptrValidateParams() ? ds : nullptr; } +// Function to overload "+" operator to concat two datasets +std::shared_ptr operator+(const std::shared_ptr &datasets1, + const std::shared_ptr &datasets2) { + std::shared_ptr ds = std::make_shared(std::vector({datasets1, datasets2})); + + return ds->ValidateParams() ? ds : nullptr; +} + // Function to create a VOCDataset. std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &mode, const std::map &class_index, bool decode, @@ -137,6 +146,14 @@ std::shared_ptr VOC(const std::string &dataset_dir, const std::strin return ds->ValidateParams() ? ds : nullptr; } +// Function to create a ZipDataset. +std::shared_ptr Zip(const std::vector> &datasets) { + auto ds = std::make_shared(datasets); + + // Call derived class validation method. + return ds->ValidateParams() ? ds : nullptr; +} + // FUNCTIONS TO CREATE DATASETS FOR DATASET OPS // (In alphabetical order) @@ -157,6 +174,14 @@ std::shared_ptr Dataset::Batch(int32_t batch_size, bool drop_remai return ds; } +// Function to create a Concat dataset +std::shared_ptr Dataset::Concat(const std::vector> &datasets) { + auto ds = std::make_shared(datasets); + ds->children.push_back(shared_from_this()); + + return ds->ValidateParams() ? ds : nullptr; +} + // Function to create a Map dataset. std::shared_ptr Dataset::Map(std::vector> operations, std::vector input_columns, @@ -269,16 +294,10 @@ std::shared_ptr Dataset::Take(int32_t count) { // Function to create a Zip dataset std::shared_ptr Dataset::Zip(const std::vector> &datasets) { // Default values - auto ds = std::make_shared(); - - if (!ds->ValidateParams()) { - return nullptr; - } - for (auto dataset : datasets) { - ds->children.push_back(dataset); - } + auto ds = std::make_shared(datasets); + ds->children.push_back(shared_from_this()); - return ds; + return ds->ValidateParams() ? ds : nullptr; } // OTHER FUNCTIONS @@ -526,6 +545,27 @@ bool BatchDataset::ValidateParams() { return true; } +// Function to build ConcatOp +ConcatDataset::ConcatDataset(const std::vector> &datasets) : datasets_(datasets) { + this->children = datasets_; +} + +bool ConcatDataset::ValidateParams() { + if (datasets_.empty()) { + MS_LOG(ERROR) << "Concat: concatenated datasets are not specified."; + return false; + } + return true; +} + +std::vector> ConcatDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + node_ops.push_back(std::make_shared(connector_que_size_)); + return node_ops; +} + MapDataset::MapDataset(std::vector> operations, std::vector input_columns, std::vector output_columns, const std::vector &project_columns) : operations_(operations), @@ -698,9 +738,19 @@ bool TakeDataset::ValidateParams() { } // Function to build ZipOp -ZipDataset::ZipDataset() {} +ZipDataset::ZipDataset(const std::vector> &datasets) : datasets_(datasets) { + for (auto dataset : datasets_) { + this->children.push_back(dataset); + } +} -bool ZipDataset::ValidateParams() { return true; } +bool ZipDataset::ValidateParams() { + if (datasets_.empty()) { + MS_LOG(ERROR) << "Zip: dataset to zip are not specified."; + return false; + } + return true; +} std::vector> ZipDataset::Build() { // A vector containing shared pointer to the Dataset Ops that this object will create diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h index 5af8c648e2..df56e71b96 100644 --- a/mindspore/ccsrc/minddata/dataset/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h @@ -48,6 +48,7 @@ class MnistDataset; class VOCDataset; // Dataset Op classes (in alphabetical order) class BatchDataset; +class ConcatDataset; class MapDataset; class ProjectDataset; class RenameDataset; @@ -98,6 +99,14 @@ std::shared_ptr ImageFolder(std::string dataset_dir, bool de /// \return Shared pointer to the current MnistDataset std::shared_ptr Mnist(std::string dataset_dir, std::shared_ptr sampler = nullptr); +/// \brief Function to create a ConcatDataset +/// \notes Reload "+" operator to concat two datasets +/// \param[in] datasets1 Shared pointer to the first dataset to be concatenated +/// \param[in] datasets2 Shared pointer to the second dataset to be concatenated +/// \return Shared pointer to the current ConcatDataset +std::shared_ptr operator+(const std::shared_ptr &datasets1, + const std::shared_ptr &datasets2); + /// \brief Function to create a VOCDataset /// \notes The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32], @@ -116,6 +125,12 @@ std::shared_ptr VOC(const std::string &dataset_dir, const std::strin const std::map &class_index = {}, bool decode = false, std::shared_ptr sampler = nullptr); +/// \brief Function to create a ZipDataset +/// \notes Applies zip to the dataset +/// \param[in] datasets List of shared pointers to the datasets that we want to zip +/// \return Shared pointer to the current Dataset +std::shared_ptr Zip(const std::vector> &datasets); + /// \class Dataset datasets.h /// \brief A base class to represent a dataset in the data pipeline. class Dataset : public std::enable_shared_from_this { @@ -158,6 +173,12 @@ class Dataset : public std::enable_shared_from_this { /// \return Shared pointer to the current BatchDataset std::shared_ptr Batch(int32_t batch_size, bool drop_remainder = false); + /// \brief Function to create a ConcatDataset + /// \notes Concat the datasets in the input + /// \param[in] datasets List of shared pointers to the dataset that should be concatenated together + /// \return Shared pointer to the current ConcatDataset + std::shared_ptr Concat(const std::vector> &datasets); + /// \brief Function to create a MapDataset /// \notes Applies each operation in operations to this dataset /// \param[in] operations Vector of operations to be applied on the dataset. Operations are @@ -220,7 +241,7 @@ class Dataset : public std::enable_shared_from_this { /// \brief Function to create a Zip Dataset /// \notes Applies zip to the dataset - /// \param[in] datasets A list of shared pointer to the datasets that we want to zip + /// \param[in] datasets A list of shared pointers to the datasets that we want to zip /// \return Shared pointer to the current Dataset std::shared_ptr Zip(const std::vector> &datasets); @@ -377,6 +398,26 @@ class BatchDataset : public Dataset { std::map>> pad_map_; }; +class ConcatDataset : public Dataset { + public: + /// \brief Constructor + explicit ConcatDataset(const std::vector> &datasets); + + /// \brief Destructor + ~ConcatDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return The list of shared pointers to the newly created DatasetOps + std::vector> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + std::vector> datasets_; +}; + class MapDataset : public Dataset { public: /// \brief Constructor @@ -521,7 +562,7 @@ class TakeDataset : public Dataset { class ZipDataset : public Dataset { public: /// \brief Constructor - ZipDataset(); + explicit ZipDataset(const std::vector> &datasets); /// \brief Destructor ~ZipDataset() = default; @@ -533,6 +574,9 @@ class ZipDataset : public Dataset { /// \brief Parameters validation /// \return bool true if all the params are valid bool ValidateParams() override; + + private: + std::vector> datasets_; }; } // namespace api diff --git a/tests/ut/cpp/dataset/c_api_test.cc b/tests/ut/cpp/dataset/c_api_test.cc index a8b7530ad7..f302854c08 100644 --- a/tests/ut/cpp/dataset/c_api_test.cc +++ b/tests/ut/cpp/dataset/c_api_test.cc @@ -53,6 +53,8 @@ class MindDataTestPipeline : public UT::DatasetOpTesting { TEST_F(MindDataTestPipeline, TestBatchAndRepeat) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBatchAndRepeat."; + // Create a Mnist Dataset std::string folder_path = datasets_root_path_ + "/testMnistData/"; std::shared_ptr ds = Mnist(folder_path, RandomSampler(false, 10)); @@ -92,12 +94,16 @@ TEST_F(MindDataTestPipeline, TestBatchAndRepeat) { } TEST_F(MindDataTestPipeline, TestMnistFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFail1."; + // Create a Mnist Dataset std::shared_ptr ds = Mnist("", RandomSampler(false, 10)); EXPECT_EQ(ds, nullptr); } TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTensorOpsAndMap."; + // Create a Mnist Dataset std::string folder_path = datasets_root_path_ + "/testMnistData/"; std::shared_ptr ds = Mnist(folder_path, RandomSampler(false, 20)); @@ -148,6 +154,8 @@ TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) { } TEST_F(MindDataTestPipeline, TestUniformAugWithOps) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestUniformAugWithOps."; + // Create a Mnist Dataset std::string folder_path = datasets_root_path_ + "/testMnistData/"; std::shared_ptr ds = Mnist(folder_path, RandomSampler(false, 20)); @@ -199,6 +207,8 @@ TEST_F(MindDataTestPipeline, TestUniformAugWithOps) { } TEST_F(MindDataTestPipeline, TestRandomFlip) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomFlip."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -249,6 +259,8 @@ TEST_F(MindDataTestPipeline, TestRandomFlip) { } TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderBatchAndRepeat."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -288,12 +300,16 @@ TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { } TEST_F(MindDataTestPipeline, TestImageFolderFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFail1."; + // Create an ImageFolder Dataset std::shared_ptr ds = ImageFolder("", true, nullptr); EXPECT_EQ(ds, nullptr); } TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderWithSamplers."; + std::shared_ptr sampl = DistributedSampler(2, 1); EXPECT_NE(sampl, nullptr); @@ -353,6 +369,8 @@ TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { } TEST_F(MindDataTestPipeline, TestPad) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPad."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -406,6 +424,8 @@ TEST_F(MindDataTestPipeline, TestPad) { } TEST_F(MindDataTestPipeline, TestCutOut) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCutOut."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -456,6 +476,8 @@ TEST_F(MindDataTestPipeline, TestCutOut) { } TEST_F(MindDataTestPipeline, TestNormalize) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNormalize."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -503,6 +525,8 @@ TEST_F(MindDataTestPipeline, TestNormalize) { } TEST_F(MindDataTestPipeline, TestDecode) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDecode."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, false, RandomSampler(false, 10)); @@ -549,6 +573,8 @@ TEST_F(MindDataTestPipeline, TestDecode) { } TEST_F(MindDataTestPipeline, TestShuffleDataset) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestShuffleDataset."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -735,6 +761,8 @@ TEST_F(MindDataTestPipeline, TestTakeDatasetError1) { } TEST_F(MindDataTestPipeline, TestCifar10Dataset) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar10Dataset."; + // Create a Cifar10 Dataset std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; std::shared_ptr ds = Cifar10(folder_path, RandomSampler(false, 10)); @@ -767,6 +795,7 @@ TEST_F(MindDataTestPipeline, TestCifar10Dataset) { } TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar10DatasetFail1."; // Create a Cifar10 Dataset std::shared_ptr ds = Cifar10("", RandomSampler(false, 10)); @@ -774,6 +803,7 @@ TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) { } TEST_F(MindDataTestPipeline, TestCifar100Dataset) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100Dataset."; // Create a Cifar100 Dataset std::string folder_path = datasets_root_path_ + "/testCifar100Data/"; @@ -808,6 +838,7 @@ TEST_F(MindDataTestPipeline, TestCifar100Dataset) { } TEST_F(MindDataTestPipeline, TestCifar100DatasetFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100DatasetFail1."; // Create a Cifar100 Dataset std::shared_ptr ds = Cifar100("", RandomSampler(false, 10)); @@ -815,6 +846,8 @@ TEST_F(MindDataTestPipeline, TestCifar100DatasetFail1) { } TEST_F(MindDataTestPipeline, TestRandomColorAdjust) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomColorAdjust."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -873,6 +906,8 @@ TEST_F(MindDataTestPipeline, TestRandomColorAdjust) { } TEST_F(MindDataTestPipeline, TestRandomRotation) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomRotation."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -920,6 +955,8 @@ TEST_F(MindDataTestPipeline, TestRandomRotation) { } TEST_F(MindDataTestPipeline, TestProjectMap) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProjectMap."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -972,6 +1009,9 @@ TEST_F(MindDataTestPipeline, TestProjectMap) { } TEST_F(MindDataTestPipeline, TestZipSuccess) { + // Testing the member zip() function + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -1000,7 +1040,7 @@ TEST_F(MindDataTestPipeline, TestZipSuccess) { EXPECT_NE(ds2, nullptr); // Create a Zip operation on the datasets - ds = ds->Zip({ds, ds1, ds2}); + ds = ds->Zip({ds1, ds2}); EXPECT_NE(ds, nullptr); // Create a Batch operation on ds @@ -1017,6 +1057,13 @@ TEST_F(MindDataTestPipeline, TestZipSuccess) { std::unordered_map> row; iter->GetNextRow(&row); + // Check zipped column names + EXPECT_EQ(row.size(), 4); + EXPECT_NE(row.find("image"), row.end()); + EXPECT_NE(row.find("label"), row.end()); + EXPECT_NE(row.find("col1"), row.end()); + EXPECT_NE(row.find("col2"), row.end()); + uint64_t i = 0; while (row.size() != 0) { i++; @@ -1031,7 +1078,62 @@ TEST_F(MindDataTestPipeline, TestZipSuccess) { iter->Stop(); } +TEST_F(MindDataTestPipeline, TestZipSuccess2) { + // Testing the static zip() function + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess2."; + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 9)); + EXPECT_NE(ds, nullptr); + std::shared_ptr ds2 = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds2, nullptr); + + // Create a Rename operation on ds (so that the 2 datasets we are going to zip have distinct column names) + ds = ds->Rename({"image", "label"}, {"col1", "col2"}); + EXPECT_NE(ds, nullptr); + + // Create a Zip operation on the datasets + ds = Zip({ds, ds2}); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + // Check zipped column names + EXPECT_EQ(row.size(), 4); + EXPECT_NE(row.find("image"), row.end()); + EXPECT_NE(row.find("label"), row.end()); + EXPECT_NE(row.find("col1"), row.end()); + EXPECT_NE(row.find("col2"), row.end()); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 9); + + // Manually terminate the pipeline + iter->Stop(); +} + TEST_F(MindDataTestPipeline, TestZipFail) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipFail."; // We expect this test to fail because we are the both datasets we are zipping have "image" and "label" columns // and zip doesn't accept datasets with same column names @@ -1045,7 +1147,7 @@ TEST_F(MindDataTestPipeline, TestZipFail) { EXPECT_NE(ds1, nullptr); // Create a Zip operation on the datasets - ds = ds->Zip({ds, ds1}); + ds = Zip({ds, ds1}); EXPECT_NE(ds, nullptr); // Create a Batch operation on ds @@ -1059,7 +1161,24 @@ TEST_F(MindDataTestPipeline, TestZipFail) { EXPECT_EQ(iter, nullptr); } +TEST_F(MindDataTestPipeline, TestZipFail2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipFail2."; + // This case is expected to fail because the input dataset is empty. + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create a Zip operation on the datasets + // Input dataset to zip is empty + ds = Zip({}); + EXPECT_EQ(ds, nullptr); +} + TEST_F(MindDataTestPipeline, TestRenameSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameSuccess."; + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -1108,6 +1227,7 @@ TEST_F(MindDataTestPipeline, TestRenameSuccess) { } TEST_F(MindDataTestPipeline, TestRenameFail) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail."; // We expect this test to fail because input and output in Rename are not the same size // Create an ImageFolder Dataset @@ -1127,6 +1247,7 @@ TEST_F(MindDataTestPipeline, TestRenameFail) { TEST_F(MindDataTestPipeline, TestVOCSegmentation) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentation."; + // Create a VOC Dataset std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; std::shared_ptr ds = VOC(folder_path, "Segmentation", "train", {}, false, SequentialSampler(0, 3)); @@ -1176,6 +1297,7 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentation) { TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentationError1."; + // Create a VOC Dataset std::map class_index; class_index["car"] = 0; @@ -1188,6 +1310,7 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) { TEST_F(MindDataTestPipeline, TestVOCInvalidTaskOrMode) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCInvalidTaskOrMode."; + // Create a VOC Dataset std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; std::shared_ptr ds_1 = VOC(folder_path, "Classification", "train", {}, false, SequentialSampler(0, 3)); @@ -1201,6 +1324,7 @@ TEST_F(MindDataTestPipeline, TestVOCInvalidTaskOrMode) { TEST_F(MindDataTestPipeline, TestVOCDetection) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCDetection."; + // Create a VOC Dataset std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; std::shared_ptr ds = VOC(folder_path, "Detection", "train", {}, false, SequentialSampler(0, 4)); @@ -1246,6 +1370,7 @@ TEST_F(MindDataTestPipeline, TestVOCDetection) { TEST_F(MindDataTestPipeline, TestVOCClassIndex) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCClassIndex."; + // Create a VOC Dataset std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; std::map class_index; @@ -1289,3 +1414,153 @@ TEST_F(MindDataTestPipeline, TestVOCClassIndex) { // Manually terminate the pipeline iter->Stop(); } + +TEST_F(MindDataTestPipeline, TestConcatSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess."; + + // Create an ImageFolder Dataset + // Column names: {"image", "label"} + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create a Cifar10 Dataset + // Column names: {"image", "label"} + folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds2 = Cifar10(folder_path, RandomSampler(false, 9)); + EXPECT_NE(ds2, nullptr); + + // Create a Project operation on ds + ds = ds->Project({"image"}); + EXPECT_NE(ds, nullptr); + ds2 = ds2->Project({"image"}); + EXPECT_NE(ds, nullptr); + + // Create a Concat operation on the ds + ds = ds->Concat({ds2}); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 19); + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestConcatSuccess2) { + // Test "+" operator to concat two datasets + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess2."; + + // Create an ImageFolder Dataset + // Column names: {"image", "label"} + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create a Cifar10 Dataset + // Column names: {"image", "label"} + folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds2 = Cifar10(folder_path, RandomSampler(false, 9)); + EXPECT_NE(ds2, nullptr); + + // Create a Project operation on ds + ds = ds->Project({"image"}); + EXPECT_NE(ds, nullptr); + ds2 = ds2->Project({"image"}); + EXPECT_NE(ds, nullptr); + + // Create a Concat operation on the ds + ds = ds + ds2; + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 19); + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestConcatFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail1."; + // This case is expected to fail because the input column names of concatenated datasets are not the same + + // Create an ImageFolder Dataset + // Column names: {"image", "label"} + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds, nullptr); + std::shared_ptr ds2 = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create a Rename operation on ds + ds2 = ds2->Rename({"image", "label"}, {"col1", "col2"}); + EXPECT_NE(ds, nullptr); + + // Create a Project operation on the ds + // Name of datasets to concat doesn't not match + ds = ds->Concat({ds2}); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_EQ(iter, nullptr); +} + +TEST_F(MindDataTestPipeline, TestConcatFail2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail2."; + // This case is expected to fail because the input dataset is empty. + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create a Project operation on the ds + // Input dataset to concat is empty + ds = ds->Concat({}); + EXPECT_EQ(ds, nullptr); +}