| @@ -82,6 +82,70 @@ TEST_F(MindDataTestPipeline, TestTextFileDatasetBasic) { | |||
| GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestTextFileDatasetBasicWithPipeline) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileDatasetBasicWithPipeline."; | |||
| // Test TextFile Dataset with single text file and many default inputs | |||
| // Set configuration | |||
| uint32_t original_seed = GlobalContext::config_manager()->seed(); | |||
| uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers(); | |||
| MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers; | |||
| GlobalContext::config_manager()->set_seed(987); | |||
| GlobalContext::config_manager()->set_num_parallel_workers(4); | |||
| // Create two TextFile Dataset, with single text file | |||
| // Note: 1.txt has 3 rows | |||
| // Use 2 samples | |||
| // Use defaults for other input parameters | |||
| std::string tf_file1 = datasets_root_path_ + "/testTextFileDataset/1.txt"; | |||
| std::shared_ptr<Dataset> ds1 = TextFile({tf_file1}, 2); | |||
| std::shared_ptr<Dataset> ds2 = TextFile({tf_file1}, 2); | |||
| EXPECT_NE(ds1, nullptr); | |||
| EXPECT_NE(ds2, nullptr); | |||
| // Create two Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds1 = ds1->Repeat(repeat_num); | |||
| EXPECT_NE(ds1, nullptr); | |||
| repeat_num = 3; | |||
| ds2 = ds2->Repeat(repeat_num); | |||
| EXPECT_NE(ds2, nullptr); | |||
| // Create a Concat operation on the ds | |||
| ds1 = ds1->Concat({ds2}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| // Create an iterator over the result of the above dataset. | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds1->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| EXPECT_NE(row.find("text"), row.end()); | |||
| std::vector<std::string> expected_result = {"Be happy every day.", "This is a text file."}; | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| auto text = row["text"]; | |||
| MS_LOG(INFO) << "Tensor text shape: " << text->shape(); | |||
| i++; | |||
| iter->GetNextRow(&row); | |||
| } | |||
| // Expect 10 samples | |||
| EXPECT_EQ(i, 10); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| // Restore configuration | |||
| GlobalContext::config_manager()->set_seed(original_seed); | |||
| GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestTextFileGetters) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileGetters."; | |||
| // Test TextFile Dataset with single text file and many default inputs | |||