From 03b32e4eabc5128110ea9d673ba2ffc4bdd9cc2f Mon Sep 17 00:00:00 2001 From: liucunwei Date: Fri, 25 Mar 2022 10:25:19 +0800 Subject: [PATCH] Add some ut pipeline test case for C++ dataset --- .../dataset/c_api_dataset_caltech256_test.cc | 58 +++++++++++++++ .../ut/cpp/dataset/c_api_dataset_imdb_test.cc | 60 +++++++++++++++ .../cpp/dataset/c_api_dataset_semeion_test.cc | 58 +++++++++++++++ .../dataset/c_api_dataset_wiki_text_test.cc | 74 +++++++++++++++++++ 4 files changed, 250 insertions(+) diff --git a/tests/ut/cpp/dataset/c_api_dataset_caltech256_test.cc b/tests/ut/cpp/dataset/c_api_dataset_caltech256_test.cc index 6b26f14372..8c466df98e 100755 --- a/tests/ut/cpp/dataset/c_api_dataset_caltech256_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_caltech256_test.cc @@ -120,6 +120,64 @@ TEST_F(MindDataTestPipeline, TestCaltech256DatasetWithPipeline) { iter->Stop(); } +/// Feature: Caltech256IteratorOneColumn. +/// Description: test iterator of Caltech256Dataset with only the "image" column. +/// Expectation: get correct data. +TEST_F(MindDataTestPipeline, TestCaltech256IteratorOneColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCaltech256IteratorOneColumn."; + // Create a Caltech256 Dataset. + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = Caltech256(folder_path, std::make_shared(false, 44)); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // Only select "image" column and drop others + std::vector columns = {"image"}; + std::shared_ptr iter = ds->CreateIterator(columns, -1); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::vector row; + ASSERT_OK(iter->GetNextRow(&row)); + std::vector expect_image = {1, 159109}; + + uint64_t i = 0; + while (row.size() != 0) { + for (auto &v : row) { + MS_LOG(INFO) << "image shape:" << v.Shape(); + EXPECT_EQ(expect_image, v.Shape()); + } + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 44); + + // Manually terminate the pipeline + iter->Stop(); +} + +/// Feature: Caltech256IteratorWrongColumn. +/// Description: test iterator of Caltech256Dataset with wrong column. +/// Expectation: get none piece of data. +TEST_F(MindDataTestPipeline, TestCaltech256IteratorWrongColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCaltech256IteratorWrongColumn."; + // Create a Caltech256 Dataset. + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = Caltech256(folder_path, std::make_shared(false, 44)); + EXPECT_NE(ds, nullptr); + + // Pass wrong column name + std::vector columns = {"digital"}; + std::shared_ptr iter = ds->CreateIterator(columns); + EXPECT_EQ(iter, nullptr); +} + /// Feature: Caltech256Dataset /// Description: test getting size of Caltech256Dataset /// Expectation: the size is correct diff --git a/tests/ut/cpp/dataset/c_api_dataset_imdb_test.cc b/tests/ut/cpp/dataset/c_api_dataset_imdb_test.cc index dc01b9daf3..bb7a180679 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_imdb_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_imdb_test.cc @@ -189,6 +189,66 @@ TEST_F(MindDataTestPipeline, TestIMDBBasicWithPipeline) { iter->Stop(); } +/// Feature: IMDBIteratorOneColumn. +/// Description: test iterator of IMDBDataset with only the "text" column. +/// Expectation: get correct data. +TEST_F(MindDataTestPipeline, TestIMDBIteratorOneColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIMDBIteratorOneColumn."; + std::string dataset_path = datasets_root_path_ + "/testIMDBDataset"; + std::string usage = "all"; // 'train', 'test', 'all' + + // Create a IMDB Dataset + std::shared_ptr ds = IMDB(dataset_path, usage); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // Only select "text" column and drop others + std::vector columns = {"text"}; + std::shared_ptr iter = ds->CreateIterator(columns, -1); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::vector row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + for (auto &v : row) { + MS_LOG(INFO) << "text shape:" << v.Shape(); + } + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 8); + + // Manually terminate the pipeline + iter->Stop(); +} + +/// Feature: IMDBIteratorWrongColumn. +/// Description: test iterator of IMDBDataset with wrong column. +/// Expectation: get none piece of data. +TEST_F(MindDataTestPipeline, TestIMDBIteratorWrongColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIMDBIteratorWrongColumn."; + std::string dataset_path = datasets_root_path_ + "/testIMDBDataset"; + std::string usage = "all"; // 'train', 'test', 'all' + + // Create a IMDB Dataset + std::shared_ptr ds = IMDB(dataset_path, usage); + EXPECT_NE(ds, nullptr); + + // Pass wrong column name + std::vector columns = {"digital"}; + std::shared_ptr iter = ds->CreateIterator(columns); + EXPECT_EQ(iter, nullptr); +} + /// Feature: Test IMDB Dataset. /// Description: read IMDB data with GetDatasetSize, GetColumnNames, GetBatchSize. /// Expectation: the data is processed successfully. diff --git a/tests/ut/cpp/dataset/c_api_dataset_semeion_test.cc b/tests/ut/cpp/dataset/c_api_dataset_semeion_test.cc index 9e9cde3d9b..3e9576ded2 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_semeion_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_semeion_test.cc @@ -119,6 +119,64 @@ TEST_F(MindDataTestPipeline, TestSemeionDatasetWithPipeline) { iter->Stop(); } +/// Feature: SemeionIteratorOneColumn. +/// Description: test iterator of SemeionDataset with only the "image" column. +/// Expectation: get correct data. +TEST_F(MindDataTestPipeline, TestSemeionIteratorOneColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionIteratorOneColumn."; + // Create a Semeion Dataset. + std::string folder_path = datasets_root_path_ + "/testSemeionData"; + std::shared_ptr ds = Semeion(folder_path, std::make_shared(false, 5), nullptr); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // Only select "image" column and drop others + std::vector columns = {"image"}; + std::shared_ptr iter = ds->CreateIterator(columns, -1); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::vector row; + ASSERT_OK(iter->GetNextRow(&row)); + std::vector expect_image = {1, 16, 16}; + + uint64_t i = 0; + while (row.size() != 0) { + for (auto &v : row) { + MS_LOG(INFO) << "image shape:" << v.Shape(); + EXPECT_EQ(expect_image, v.Shape()); + } + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 5); + + // Manually terminate the pipeline + iter->Stop(); +} + +/// Feature: SemeionIteratorWrongColumn. +/// Description: test iterator of SemeionDataset with wrong column. +/// Expectation: get none piece of data. +TEST_F(MindDataTestPipeline, TestSemeionIteratorWrongColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionIteratorWrongColumn."; + // Create a Semeion Dataset. + std::string folder_path = datasets_root_path_ + "/testSemeionData"; + std::shared_ptr ds = Semeion(folder_path, std::make_shared(false, 5), nullptr); + EXPECT_NE(ds, nullptr); + + // Pass wrong column name + std::vector columns = {"digital"}; + std::shared_ptr iter = ds->CreateIterator(columns); + EXPECT_EQ(iter, nullptr); +} + /// Feature: SemeionDataset. /// Description: read number of all samples from all files according to different versions. /// Expectation: 10. diff --git a/tests/ut/cpp/dataset/c_api_dataset_wiki_text_test.cc b/tests/ut/cpp/dataset/c_api_dataset_wiki_text_test.cc index 124bebf6ad..a568f3e545 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_wiki_text_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_wiki_text_test.cc @@ -146,6 +146,80 @@ TEST_F(MindDataTestPipeline, TestWikiTextDatasetBasicWithPipeline) { GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers); } +/// Feature: WikiTextIteratorOneColumn. +/// Description: test iterator of WikiTextDataset with only the "text" column. +/// Expectation: get correct data. +TEST_F(MindDataTestPipeline, TestWikiTextIteratorOneColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestWikiTextIteratorOneColumn."; + // Test WikiText Dataset with single text file and many default inputs + + // Set configuration + uint32_t original_seed = GlobalContext::config_manager()->seed(); + uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers(); + MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers; + GlobalContext::config_manager()->set_seed(987); + GlobalContext::config_manager()->set_num_parallel_workers(4); + + std::string dataset_dir = datasets_root_path_ + "/testWikiText"; + std::shared_ptr ds = WikiText(dataset_dir, "test", 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // Only select "text" column and drop others + std::vector columns = {"text"}; + std::shared_ptr iter = ds->CreateIterator(columns, -1); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::vector row; + ASSERT_OK(iter->GetNextRow(&row)); + std::vector expect_image = {1}; + + uint64_t i = 0; + while (row.size() != 0) { + for (auto &v : row) { + MS_LOG(INFO) << "image shape:" << v.Shape(); + EXPECT_EQ(expect_image, v.Shape()); + } + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); +} + +/// Feature: WikiTextIteratorWrongColumn. +/// Description: test iterator of WikiTextDataset with wrong column. +/// Expectation: get none piece of data. +TEST_F(MindDataTestPipeline, TestWikiTextIteratorWrongColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestWikiTextIteratorWrongColumn."; + // Test WikiText Dataset with single text file and many default inputs + + // Set configuration + uint32_t original_seed = GlobalContext::config_manager()->seed(); + uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers(); + MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers; + GlobalContext::config_manager()->set_seed(987); + GlobalContext::config_manager()->set_num_parallel_workers(4); + + std::string dataset_dir = datasets_root_path_ + "/testWikiText"; + std::shared_ptr ds = WikiText(dataset_dir, "test", 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Pass wrong column name + std::vector columns = {"digital"}; + std::shared_ptr iter = ds->CreateIterator(columns); + EXPECT_EQ(iter, nullptr); +} + /// Feature: Test WikiText Dataset. /// Description: read WikiText data and get data. /// Expectation: the data is processed successfully.