| @@ -3,7 +3,6 @@ include(GoogleTest) | |||
| SET(DE_UT_SRCS | |||
| affine_op_test.cc | |||
| execute_test.cc | |||
| album_op_test.cc | |||
| arena_test.cc | |||
| auto_contrast_op_test.cc | |||
| batch_op_test.cc | |||
| @@ -60,18 +59,13 @@ SET(DE_UT_SRCS | |||
| c_api_vision_soft_dvpp_test.cc | |||
| c_api_vision_uniform_aug_test.cc | |||
| c_api_vision_vertical_flip_test.cc | |||
| celeba_op_test.cc | |||
| center_crop_op_test.cc | |||
| channel_swap_test.cc | |||
| cifar_op_test.cc | |||
| circular_pool_test.cc | |||
| client_config_test.cc | |||
| clue_op_test.cc | |||
| coco_op_test.cc | |||
| common/bboxop_common.cc | |||
| common/common.cc | |||
| common/cvop_common.cc | |||
| concat_op_test.cc | |||
| concatenate_op_test.cc | |||
| connector_test.cc | |||
| csv_op_test.cc | |||
| @@ -88,7 +82,6 @@ SET(DE_UT_SRCS | |||
| c_api_vision_gaussian_blur_test.cc | |||
| global_context_test.cc | |||
| gnn_graph_test.cc | |||
| image_folder_op_test.cc | |||
| image_process_test.cc | |||
| interrupt_test.cc | |||
| ir_callback_test.cc | |||
| @@ -104,7 +97,6 @@ SET(DE_UT_SRCS | |||
| memory_pool_test.cc | |||
| mind_record_op_test.cc | |||
| mixup_batch_op_test.cc | |||
| mnist_op_test.cc | |||
| normalize_op_test.cc | |||
| one_hot_op_test.cc | |||
| optimization_pass_test.cc | |||
| @@ -112,7 +104,6 @@ SET(DE_UT_SRCS | |||
| pad_op_test.cc | |||
| path_test.cc | |||
| perf_data_test.cc | |||
| project_op_test.cc | |||
| queue_test.cc | |||
| random_affine_op_test.cc | |||
| random_color_adjust_op_test.cc | |||
| @@ -131,16 +122,12 @@ SET(DE_UT_SRCS | |||
| random_solarize_op_test.cc | |||
| random_vertical_flip_op_test.cc | |||
| random_vertical_flip_with_bbox_op_test.cc | |||
| rename_op_test.cc | |||
| rescale_op_test.cc | |||
| resize_op_test.cc | |||
| resize_with_bbox_op_test.cc | |||
| rgba_to_bgr_op_test.cc | |||
| rgba_to_rgb_op_test.cc | |||
| schema_test.cc | |||
| sentence_piece_vocab_op_test.cc | |||
| shuffle_op_test.cc | |||
| skip_op_test.cc | |||
| slice_op_test.cc | |||
| sliding_window_op_test.cc | |||
| solarize_op_test.cc | |||
| @@ -150,13 +137,11 @@ SET(DE_UT_SRCS | |||
| subset_random_sampler_test.cc | |||
| subset_sampler_test.cc | |||
| swap_red_blue_test.cc | |||
| take_op_test.cc | |||
| task_manager_test.cc | |||
| tensor_row_test.cc | |||
| tensor_string_test.cc | |||
| tensor_test.cc | |||
| tensorshape_test.cc | |||
| text_file_op_test.cc | |||
| tfReader_op_test.cc | |||
| to_float16_op_test.cc | |||
| tokenizer_op_test.cc | |||
| @@ -165,16 +150,12 @@ SET(DE_UT_SRCS | |||
| trucate_pair_test.cc | |||
| type_cast_op_test.cc | |||
| weighted_random_sampler_test.cc | |||
| zip_op_test.cc | |||
| ) | |||
| if(ENABLE_PYTHON) | |||
| set(DE_UT_SRCS | |||
| ${DE_UT_SRCS} | |||
| filter_op_test.cc | |||
| manifest_op_test.cc | |||
| voc_op_test.cc | |||
| sentence_piece_vocab_op_test.cc | |||
| ) | |||
| endif() | |||
| @@ -779,21 +779,20 @@ TEST_F(MindDataTestPipeline, TestFilterFail3) { | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderBatchAndRepeat."; | |||
| void ImageFolderBatchAndRepeat(int32_t repeat_count, int32_t batch_size, int64_t num_samples, | |||
| bool replacement, std::string datasets_root_path) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10)); | |||
| std::string folder_path = datasets_root_path + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, | |||
| std::make_shared<RandomSampler>(replacement, num_samples)); | |||
| uint64_t ds_size = 44; | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| ds = ds->Repeat(repeat_count); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 2; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_NE(ds, nullptr); | |||
| @@ -814,12 +813,35 @@ TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 10); | |||
| uint64_t expect = 0; | |||
| if (batch_size != 0) { | |||
| if (num_samples == 0) { | |||
| expect = ds_size * repeat_count / batch_size; | |||
| } else { | |||
| expect = num_samples * repeat_count / batch_size; | |||
| } | |||
| } else { | |||
| expect = 0; | |||
| } | |||
| EXPECT_EQ(i, expect); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test ImageFolder with Batch and Repeat operations | |||
| // Description: Perform Repeat and Batch ops with varying parameters, | |||
| // iterate through dataset and count rows | |||
| // Expectation: Number of rows should be equal to the size of the dataset/num_samples | |||
| // times the repeat_count divided by the batch_size | |||
| TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderBatchAndRepeat."; | |||
| ImageFolderBatchAndRepeat(2, 2, 10, false, datasets_root_path_); | |||
| ImageFolderBatchAndRepeat(2, 11, 0, false, datasets_root_path_); | |||
| ImageFolderBatchAndRepeat(3, 2, 12, true, datasets_root_path_); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestPipelineGetDatasetSize) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPipelineGetDatasetSize."; | |||
| @@ -1997,3 +2019,202 @@ TEST_F(MindDataTestPipeline, TestConcatTFRecord) { | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test ImageFolder with Sequential Sampler and Decode | |||
| // Description: Create ImageFolder dataset with decode=true, iterate through dataset and count rows | |||
| // Expectation: There should be 20 rows in the dataset (# of samples taken) | |||
| TEST_F(MindDataTestPipeline, TestImageFolderDecode) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderDecode."; | |||
| std::shared_ptr<Sampler> sampler = std::make_shared<SequentialSampler>(0 , 20); | |||
| EXPECT_NE(sampler, nullptr); | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, sampler); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 20); | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test TFRecord with Take operation | |||
| // Description: Perform Take operation with count = 5, iterate through dataset and count rows | |||
| // Expectation: There should be 5 rows in the dataset | |||
| TEST_F(MindDataTestPipeline, TestTFRecordTake) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordTake."; | |||
| // Create a TFRecord Dataset | |||
| std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; | |||
| std::string schema_path = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json"; | |||
| std::shared_ptr<Dataset> ds = TFRecord({file_path}, schema_path); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Take operation on ds | |||
| ds = ds->Take(5); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // iterate over the dataset and get each row | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| MS_LOG(INFO) << "Number of rows: " << i; | |||
| // Expect 5 rows | |||
| EXPECT_EQ(i, 5); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test Skip operation on TFRecord dataset | |||
| // Description: Perform skip operation with count = 5, iterate through dataset and count rows | |||
| // Expectation: There should be 7 rows, (12 rows initially and 5 are skipped) | |||
| TEST_F(MindDataTestPipeline, TestTFRecordSkip) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordSkip."; | |||
| // Create a TFRecord Dataset | |||
| std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; | |||
| std::string schema_path = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json"; | |||
| std::shared_ptr<Dataset> ds = TFRecord({file_path}, schema_path); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Take operation on ds | |||
| ds = ds->Skip(5); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // iterate over the dataset and get each row | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| MS_LOG(INFO) << "Number of rows: " << i; | |||
| // Expect 7 rows | |||
| EXPECT_EQ(i, 7); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test Rename operation on TFRecord | |||
| // Description: Rename columns in dataset, iterate through dataset and count rows | |||
| // Expectation: The columns should have a new name after the Rename op and there should be 3 rows in the dataset | |||
| TEST_F(MindDataTestPipeline, TestTFRecordRename) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordRename."; | |||
| // Create a TFRecord Dataset | |||
| std::string file_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data"; | |||
| std::shared_ptr<Dataset> ds = TFRecord({file_path}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Rename operation on ds | |||
| ds = ds->Rename({"label"}, {"label1"}); | |||
| ds = ds->Rename({"label1", "image"}, {"label2", "image1"}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // iterate over the dataset and get each row | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| EXPECT_NE(row.find("label2"), row.end()); | |||
| EXPECT_NE(row.find("image1"), row.end()); | |||
| EXPECT_EQ(row.find("image"), row.end()); | |||
| EXPECT_EQ(row.find("label"), row.end()); | |||
| EXPECT_EQ(row.find("label1"), row.end()); | |||
| while (row.size() != 0) { | |||
| i++; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 3); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test TFRecord with Zip and Repeat operation | |||
| // Description: Create two datasets and apply Zip operation on them. | |||
| // Apply Repeat operation on resulting dataset and count rows | |||
| // Expectation: There should be 9 rows in the dataset | |||
| TEST_F(MindDataTestPipeline, TestTFRecordZip) { | |||
| // Testing the member zip() function | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordZip."; | |||
| // Create a TFRecord Dataset | |||
| std::string file_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data"; | |||
| std::shared_ptr<Dataset> ds = TFRecord({file_path}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a TFRecord Dataset | |||
| std::string file_path1 = datasets_root_path_ + "/testBatchDataset/test.data"; | |||
| std::shared_ptr<Dataset> ds1 = TFRecord({file_path1}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| // Create a Zip operation on the datasets | |||
| ds = ds->Zip({ds1}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 3; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // iterate over the dataset and get each row | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 9); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| @@ -283,6 +283,21 @@ TEST_F(MindDataTestPipeline, TestTextFileDatasetFail7) { | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| // Feature: Test Textfile dataset | |||
| // Description: Create TextFile dataset with a file that does not exist and check the size of the dataset | |||
| // Expectation: The dataset should have size 0 | |||
| TEST_F(MindDataTestPipeline, TestTextFileFileNotExist) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileDatasetFail8."; | |||
| // Create a TextFile Dataset | |||
| // with non-existent dataset_files input | |||
| std::string tf_file1 = datasets_root_path_ + "/does/not/exist/0.txt"; | |||
| std::shared_ptr<Dataset> ds = TextFile({tf_file1}); | |||
| EXPECT_NE(ds, nullptr); | |||
| EXPECT_EQ(ds->GetDatasetSize(), 0); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestTextFileDatasetShuffleFalse1A) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileDatasetShuffleFalse1A."; | |||
| // Test TextFile Dataset with two text files and no shuffle, num_parallel_workers=1 | |||
| @@ -166,6 +166,32 @@ TEST_F(MindDataTestPipeline, TestVOCDetection) { | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test VOC dataset with detection task | |||
| // Description: Create VOC dataset with task="Detection" and count rows | |||
| // Expectation: There should be 9 rows | |||
| TEST_F(MindDataTestPipeline, TestVOCDetection1) { | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testVOC2012"; | |||
| std::shared_ptr<Dataset> ds = | |||
| VOC(dataset_path, "Detection", "train", {}, false, std::make_shared<SequentialSampler>(0, 0)); | |||
| EXPECT_NE(ds, nullptr); | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| int row_count = 0; | |||
| while (row.size() != 0) { | |||
| auto image = row["image"]; | |||
| auto label = row["label"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); | |||
| MS_LOG(INFO) << "Tensor label shape: " << label.Shape(); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 9); | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVOCInvalidTaskOrModeError1) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCInvalidTaskOrModeError1."; | |||
| @@ -235,6 +261,32 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentation) { | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test VOC dataset with Segmentation task | |||
| // Description: Create VOC dataset with take="Segmentation" and count rows | |||
| // Expectation: There should be 10 rows | |||
| TEST_F(MindDataTestPipeline, TestVOCSegmentation1) { | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testVOC2012"; | |||
| std::shared_ptr<Dataset> ds = | |||
| VOC(dataset_path, "Segmentation", "train", {}, false, std::make_shared<SequentialSampler>(0, 0)); | |||
| EXPECT_NE(ds, nullptr); | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| int row_count = 0; | |||
| while (!row.empty()) { | |||
| auto image = row["image"]; | |||
| auto target = row["target"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); | |||
| MS_LOG(INFO) << "Tensor target shape: " << target.Shape(); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 10); | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVOCSegmentationError2) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentationError2."; | |||
| @@ -88,6 +88,43 @@ TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test ImageFolder with WeightedRandomSampler | |||
| // Description: Create ImageFolder dataset with WeightedRandomRampler given num_samples=12, | |||
| // iterate through dataset and count rows | |||
| // Expectation: There should be 12 rows | |||
| TEST_F(MindDataTestPipeline, TestWeightedRandomSamplerImageFolder) { | |||
| std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1}; | |||
| std::shared_ptr<Sampler> sampl = std::make_shared<WeightedRandomSampler>(weights, 12); | |||
| EXPECT_NE(sampl, nullptr); | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 12); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1."; | |||
| // Test building a dataset with no sampler provided (defaults to random sampler | |||
| @@ -234,6 +271,74 @@ TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess4) { | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test ImageFolder with DistributedSampler | |||
| // Description: Create ImageFolder dataset with DistributedSampler given num_shards=11 and shard_id=10, | |||
| // count rows in dataset | |||
| // Expectation: There should be 4 rows (44 rows in original data/11 = 4) | |||
| TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess5) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess5."; | |||
| // Test basic setting of distributed_sampler | |||
| // num_shards=11, shard_id=10, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true | |||
| std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(11, 10, false, 0, 0, -1, true); | |||
| EXPECT_NE(sampler, nullptr); | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto label = row["label"]; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 4); | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test ImageFolder with DistributedSampler | |||
| // Description: Create ImageFolder dataset with DistributedSampler given num_shards=4 and shard_id=3, | |||
| // count rows in dataset | |||
| // Expectation: There should be 11 rows (44 rows in original data/4 = 11) | |||
| TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess6) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess6."; | |||
| // Test basic setting of distributed_sampler | |||
| // num_shards=4, shard_id=3, shuffle=false, num_samplers=12, seed=0, offset=-1, even_dist=true | |||
| std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 3, false, 12, 0, -1, true); | |||
| EXPECT_NE(sampler, nullptr); | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto label = row["label"]; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 11); | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1."; | |||
| // Test basic setting of distributed_sampler | |||
| @@ -441,3 +546,34 @@ TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) { | |||
| iter->Stop(); | |||
| } | |||
| // Feature: Test ImageFolder with PKSampler | |||
| // Description: Create ImageFolder dataset with DistributedSampler given num_val=3 and count rows | |||
| // Expectation: There should be 12 rows | |||
| TEST_F(MindDataTestPipeline, TestPKSamplerImageFolder) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPKSamplerImageFolder."; | |||
| std::shared_ptr<Sampler> sampler = std::make_shared<PKSampler>(3, false); | |||
| EXPECT_NE(sampler, nullptr); | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 12); | |||
| iter->Stop(); | |||
| } | |||
| @@ -67,17 +67,6 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) { | |||
| my_tree->AssignRoot(root_op); | |||
| root_op->AddChild(parent_op); | |||
| ASSERT_NE(root_op, nullptr); | |||
| // Testing Iterator | |||
| MS_LOG(INFO) << "Testing Tree Iterator from root."; | |||
| for (auto itr = my_tree->begin(); itr != my_tree->end(); ++itr) { | |||
| itr->Print(std::cout, false); | |||
| } | |||
| MS_LOG(INFO) << "Finished testing Tree Iterator from root."; | |||
| MS_LOG(INFO) << "Testing Tree Iterator from parentOp."; | |||
| for (auto itr = my_tree->begin(parent_op); itr != my_tree->end(); ++itr) { | |||
| itr->Print(std::cout, false); | |||
| } | |||
| MS_LOG(INFO) << "Finished testing Tree Iterator from parentOp."; | |||
| // At this point, since move semantic was used, | |||
| // I don't have any operator access myself now. | |||
| @@ -120,22 +109,4 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree2) { | |||
| MS_LOG(INFO) << "Launching my tree."; | |||
| rc = my_tree->Launch(); | |||
| ASSERT_OK(rc); | |||
| // Simulate a parse of data from our pipeline. | |||
| std::shared_ptr<DatasetOp> root_node = my_tree->root(); | |||
| // Start the loop of reading from our pipeline using iterator | |||
| MS_LOG(INFO) << "Testing DatasetIterator in testTree2."; | |||
| DatasetIterator di(my_tree); | |||
| TensorRow buffer; | |||
| rc = di.FetchNextTensorRow(&buffer); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| while (!buffer.empty()) { | |||
| rc = di.FetchNextTensorRow(&buffer); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| } | |||
| } | |||
| // Construct some tree nodes and play with them | |||
| TEST_F(MindDataTestExecutionTree, TestExecutionTree3) { MS_LOG(INFO) << "Doing MindDataTestExecutionTree3."; } | |||
| @@ -1,53 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/util/circular_pool.h" | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "common/common.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| using namespace mindspore::dataset; | |||
| namespace de = mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestfilter_op : public UT::DatasetOpTesting {}; | |||
| std::shared_ptr<de::FilterOp> Filter() { | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| int32_t op_connector_size = config_manager->op_connector_size(); | |||
| int32_t num_workers = config_manager->num_parallel_workers(); | |||
| std::shared_ptr<TensorOp> predicate_func; | |||
| std::vector<std::string> in_col_names = {}; | |||
| std::shared_ptr<de::FilterOp> op = | |||
| std::make_shared<FilterOp>(in_col_names, num_workers, op_connector_size, predicate_func); | |||
| return op; | |||
| } | |||
| TEST_F(MindDataTestfilter_op, Testfilter_opFuntions) { | |||
| MS_LOG(INFO) << "Doing MindDataTest filter_op."; | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| std::shared_ptr<DatasetOp> parent_op = Filter(); | |||
| std::shared_ptr<DatasetOp> leaf_op = Filter(); | |||
| my_tree->AssociateNode(parent_op); | |||
| my_tree->AssociateNode(leaf_op); | |||
| ASSERT_NE(parent_op, nullptr); | |||
| ASSERT_NE(leaf_op, nullptr); | |||
| } | |||
| @@ -1,456 +0,0 @@ | |||
| /** | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "common/common.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "minddata/dataset/core/global_context.h" | |||
| #include "minddata/dataset/engine/datasetops/source/image_folder_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "securec.h" | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| // std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false); | |||
| // std::shared_ptr<RepeatOp> Repeat(int repeat_cnt); | |||
| // std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); | |||
| std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path, | |||
| bool shuf = false, std::shared_ptr<SamplerRT> sampler = nullptr, | |||
| std::map<std::string, int32_t> map = {}, bool decode = false) { | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| (void)schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)); | |||
| (void)schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)); | |||
| std::set<std::string> ext = {".jpg", ".JPEG"}; | |||
| if (sampler == nullptr) { | |||
| int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data | |||
| int64_t start_index = 0; | |||
| sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); | |||
| } | |||
| std::shared_ptr<ImageFolderOp> so = | |||
| std::make_shared<ImageFolderOp>(num_works, path, conns, false, decode, ext, map, std::move(schema), sampler); | |||
| return so; | |||
| } | |||
| Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr, | |||
| DataType::Type data_type = DataType::DE_UINT32) { | |||
| TensorShape shape(std::vector<int64_t>(1, num_elements)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(shape, DataType(data_type), data, sample_ids)); | |||
| return Status::OK(); | |||
| } | |||
| class MindDataTestImageFolderSampler : public UT::DatasetOpTesting { | |||
| protected: | |||
| }; | |||
| TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeat) { | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| auto op1 = ImageFolder(16, 2, 32, folder_path, false); | |||
| auto op2 = Repeat(2); | |||
| op1->SetTotalRepeats(2); | |||
| op1->SetNumRepeatsPerEpoch(2); | |||
| auto tree = Build({op1, op2}); | |||
| tree->Prepare(); | |||
| int32_t res[] = {0, 1, 2, 3}; | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| EXPECT_TRUE(res[(i % 44) / 11] == label); | |||
| MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 88); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestRandomImageFolder) { | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| auto tree = Build({ImageFolder(16, 2, 32, folder_path, true, nullptr)}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 44); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestRandomSamplerImageFolder) { | |||
| int32_t original_seed = GlobalContext::config_manager()->seed(); | |||
| GlobalContext::config_manager()->set_seed(0); | |||
| int64_t num_samples = 12; | |||
| std::shared_ptr<SamplerRT> sampler = std::make_unique<RandomSamplerRT>(true, num_samples, true); | |||
| int32_t res[] = {2, 2, 2, 3, 2, 3, 2, 3, 1, 2, 2, 1}; // ground truth label | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| EXPECT_TRUE(res[i] == label); | |||
| MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 12); | |||
| } | |||
| GlobalContext::config_manager()->set_seed(original_seed); | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeatBatch) { | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| auto op1 = ImageFolder(16, 2, 32, folder_path, false); | |||
| auto op2 = Repeat(2); | |||
| auto op3 = Batch(11); | |||
| op1->SetTotalRepeats(2); | |||
| op1->SetNumRepeatsPerEpoch(2); | |||
| auto tree = Build({op1, op2, op3}); | |||
| tree->Prepare(); | |||
| int32_t res[4][11] = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, | |||
| {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, | |||
| {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, | |||
| {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}}; | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| while (tensor_map.size() != 0) { | |||
| std::shared_ptr<Tensor> label; | |||
| Create1DTensor(&label, 11, reinterpret_cast<unsigned char *>(res[i % 4]), DataType::DE_INT32); | |||
| EXPECT_TRUE((*label) == (*tensor_map["label"])); | |||
| MS_LOG(DEBUG) << "row: " << i << " " << tensor_map["image"]->shape() << " (*label):" << (*label) | |||
| << " *tensor_map[label]: " << *tensor_map["label"] << std::endl; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 8); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestSubsetRandomSamplerImageFolder) { | |||
| // id range 0 - 10 is label 0, and id range 11 - 21 is label 1 | |||
| std::vector<int64_t> indices({0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11}); | |||
| int64_t num_samples = 0; | |||
| std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples); | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| // Expect 6 samples for label 0 and 1 | |||
| int res[2] = {6, 6}; | |||
| auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| rc = di.GetNextAsMap(&tensor_map); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| res[label]--; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_EQ(res[0], 0); | |||
| EXPECT_EQ(res[1], 0); | |||
| EXPECT_TRUE(i == 12); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestWeightedRandomSamplerImageFolder) { | |||
| // num samples to draw. | |||
| int64_t num_samples = 12; | |||
| int64_t total_samples = 44; | |||
| int64_t samples_per_tensor = 10; | |||
| std::vector<double> weights(total_samples, std::rand() % 100); | |||
| // create sampler with replacement = replacement | |||
| std::shared_ptr<SamplerRT> sampler = | |||
| std::make_shared<WeightedRandomSamplerRT>(weights, num_samples, true, samples_per_tensor); | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| rc = di.GetNextAsMap(&tensor_map); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 12); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestImageFolderClassIndex) { | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| std::map<std::string, int32_t> map; | |||
| map["class3"] = 333; | |||
| map["class1"] = 111; | |||
| map["wrong folder name"] = 1234; // this is skipped | |||
| auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, nullptr, map)}); | |||
| int64_t res[2] = {111, 333}; | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| EXPECT_TRUE(label == res[i / 11]); | |||
| MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 22); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) { | |||
| int64_t num_samples = 0; | |||
| std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(11, 10, false, num_samples); | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| auto op1 = ImageFolder(16, 2, 32, folder_path, false, std::move(sampler)); | |||
| auto op2 = Repeat(4); | |||
| op1->SetTotalRepeats(4); | |||
| op1->SetNumRepeatsPerEpoch(4); | |||
| auto tree = Build({op1, op2}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| rc = di.GetNextAsMap(&tensor_map); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| EXPECT_EQ(i % 4, label); | |||
| MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 16); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestPKSamplerImageFolder) { | |||
| int64_t num_samples = 0; | |||
| std::shared_ptr<SamplerRT> sampler = std::make_shared<PKSamplerRT>(3, false, num_samples, 4); | |||
| int32_t res[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; // ground truth label | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| EXPECT_TRUE(res[i] == label); | |||
| MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 12); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestImageFolderDecode) { | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| std::map<std::string, int32_t> map; | |||
| map["class3"] = 333; | |||
| map["class1"] = 111; | |||
| map["wrong folder name"] = 1234; // this is skipped | |||
| int64_t num_samples = 20; | |||
| int64_t start_index = 0; | |||
| auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); | |||
| auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(seq_sampler), map, true)}); | |||
| int64_t res[2] = {111, 333}; | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| EXPECT_TRUE(label == res[i / 11]); | |||
| EXPECT_TRUE(tensor_map["image"]->shape() == | |||
| TensorShape({2268, 4032, 3})); // verify shapes are correct after decode | |||
| MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) { | |||
| int64_t num_samples = 5; | |||
| std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 0, false, num_samples); | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode | |||
| auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler), {})}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| int32_t labels[5] = {0, 0, 0, 1, 1}; | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| rc = di.GetNextAsMap(&tensor_map); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| EXPECT_EQ(labels[i], label); | |||
| MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 5); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding2) { | |||
| int64_t num_samples = 12; | |||
| std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 3, false, num_samples); | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data"; | |||
| // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode | |||
| auto tree = Build({ImageFolder(16, 16, 32, folder_path, false, std::move(sampler), {})}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| uint32_t labels[11] = {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; | |||
| EXPECT_TRUE(false); | |||
| } else { | |||
| DatasetIterator di(tree); | |||
| TensorMap tensor_map; | |||
| rc = di.GetNextAsMap(&tensor_map); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| uint64_t i = 0; | |||
| int32_t label = 0; | |||
| while (tensor_map.size() != 0) { | |||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | |||
| EXPECT_EQ(labels[i], label); | |||
| MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; | |||
| i++; | |||
| ASSERT_OK(di.GetNextAsMap(&tensor_map)); | |||
| } | |||
| EXPECT_TRUE(i == 11); | |||
| } | |||
| } | |||
| @@ -25,6 +25,7 @@ | |||
| #include "minddata/dataset/engine/jagged_connector.h" | |||
| #include "minddata/dataset/kernels/image/decode_op.h" | |||
| #include "minddata/dataset/kernels/image/resize_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||
| #include "minddata/dataset/kernels/tensor_op.h" | |||
| #include "utils/log_adapter.h" | |||
| @@ -130,9 +131,21 @@ class MindDataTestMapOp : public UT::DatasetOpTesting { | |||
| std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path, | |||
| bool shuf = false, std::shared_ptr<SamplerRT> sampler = nullptr, | |||
| std::map<std::string, int32_t> map = {}, bool decode = false); | |||
| // std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); | |||
| std::map<std::string, int32_t> map = {}, bool decode = false) { | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| (void)schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)); | |||
| (void)schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)); | |||
| std::set<std::string> ext = {".jpg", ".JPEG"}; | |||
| if (sampler == nullptr) { | |||
| int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data | |||
| int64_t start_index = 0; | |||
| sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); | |||
| } | |||
| std::shared_ptr<ImageFolderOp> so = | |||
| std::make_shared<ImageFolderOp>(num_works, path, conns, false, decode, ext, map, std::move(schema), sampler); | |||
| return so; | |||
| } | |||
| // TestAsMap scenario: | |||
| // TFReaderOp reads a dataset that have column ordering |image|label|A|B|. | |||
| @@ -1,102 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "common/common.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "minddata/dataset/engine/jagged_connector.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestProjectOp : public UT::DatasetOpTesting {}; | |||
| TEST_F(MindDataTestProjectOp, TestProjectProject) { | |||
| // Start with an empty execution tree | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| Status rc; | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| auto op_connector_size = config_manager->op_connector_size(); | |||
| auto num_workers = 1; // one file, one worker | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| std::vector<std::string> columns_to_load = {}; | |||
| std::vector<std::string> files = {dataset_path}; | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>( | |||
| num_workers, 16, 0, files, std::move(schema), op_connector_size, columns_to_load, false, 1, 0, false); | |||
| rc = my_tfreader_op->Init(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // ProjectOp | |||
| std::vector<std::string> columns_to_project = {"col_sint16", "col_float", "col_2d"}; | |||
| std::shared_ptr<ProjectOp> my_project_op = std::make_shared<ProjectOp>(columns_to_project); | |||
| rc = my_tree->AssociateNode(my_project_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Set children/root layout. | |||
| rc = my_project_op->AddChild(my_tfreader_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(my_project_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| MS_LOG(INFO) << "Launching tree and begin iteration."; | |||
| rc = my_tree->Prepare(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Launch(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(INFO) << "Row display for row #: " << row_count << "."; | |||
| ASSERT_EQ(tensor_list.size(), columns_to_project.size()); | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 12); | |||
| } | |||
| @@ -1,111 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <cstring> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "minddata/dataset/engine/datasetops/rename_op.h" | |||
| #include "minddata/dataset/engine/jagged_connector.h" | |||
| #include "common/common.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "gtest/gtest.h" | |||
| #include "minddata/dataset/core/global_context.h" | |||
| #include "utils/log_adapter.h" | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestRenameOp : public UT::DatasetOpTesting {}; | |||
| TEST_F(MindDataTestRenameOp, TestRenameOpDefault) { | |||
| // Tree: | |||
| // | |||
| // | |||
| // OpId(2) RenameOp | |||
| // | | |||
| // OpId(0) TFReaderOp | |||
| // Start with an empty execution tree | |||
| Status rc; | |||
| MS_LOG(INFO) << "UT test TestRenameBasic."; | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| // Creating TFReaderOp | |||
| std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data"; | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| int32_t op_connector_size = config_manager->op_connector_size(); | |||
| int32_t num_workers = 1; | |||
| int32_t worker_connector_size = 16; | |||
| std::vector<std::string> columns_to_load = {}; | |||
| std::vector<std::string> files = {dataset_path}; | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op = | |||
| std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size, | |||
| columns_to_load, false, 1, 0, false); | |||
| rc = my_tfreader_op->Init(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Creating DatasetOp | |||
| std::vector<std::string> in_col_names = {"label"}; | |||
| std::vector<std::string> out_col_names = {"label1"}; | |||
| std::shared_ptr<RenameOp> rename_op = std::make_shared<RenameOp>(in_col_names, out_col_names); | |||
| rc = my_tree->AssociateNode(rename_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = rename_op->AddChild(std::move(my_tfreader_op)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(rename_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Prepare(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Launch the tree execution to kick off threads and start running the pipeline | |||
| MS_LOG(INFO) << "Launching my tree."; | |||
| rc = my_tree->Launch(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Simulate a parse of data from our pipeline. | |||
| std::shared_ptr<DatasetOp> root_node = my_tree->root(); | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(INFO) << "Row display for row #: " << row_count << "."; | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 3); // Should be 3 rows fetched | |||
| } | |||
| @@ -56,55 +56,6 @@ std::shared_ptr<TextFileOp> TextFile(std::vector<std::string> text_files_list, i | |||
| return text_file_op; | |||
| } | |||
| TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromDatasetFuntions) { | |||
| MS_LOG(INFO) << "Doing MindDataTestSentencePieceVocabOp TestSentencePieceFromDatasetFuntions."; | |||
| Status rc; | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/test_sentencepiece/botchan.txt"; | |||
| auto tree = std::make_shared<ExecutionTree>(); | |||
| std::shared_ptr<TextFileOp> file_op = TextFile({dataset_path}, 1, 2); | |||
| rc = tree->AssociateNode(file_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| std::vector<std::string> cols; | |||
| std::unordered_map<std::string, std::string> m_params; | |||
| std::shared_ptr<SentencePieceVocab> spm = std::make_unique<SentencePieceVocab>(); | |||
| // Sample construstructor for reference | |||
| // BuildSentencePieceVocabOp(std::shared_ptr<SentencePieceVocab> vocab, std::vector<std::string> col_names, | |||
| // int32_t vocab_size, float character_coverage, SentencePieceModel model_type, | |||
| // const std::unordered_map<std::string, std::string> ¶ms, int32_t op_conn_size); | |||
| std::shared_ptr<BuildSentencePieceVocabOp> spv_op = std::make_shared<BuildSentencePieceVocabOp>( | |||
| std::move(spm), cols, 5000, 0.9995, SentencePieceModel::kUnigram, m_params, 2); | |||
| rc = tree->AssociateNode(spv_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = spv_op->AddChild(file_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| file_op->SetTotalRepeats(1); | |||
| file_op->SetNumRepeatsPerEpoch(1); | |||
| rc = tree->AssignRoot(spv_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = tree->Prepare(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = tree->Launch(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| while (!tensor_list.empty()) { | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| } | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| } | |||
| TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromFileFuntions) { | |||
| MS_LOG(INFO) << "Doing MindDataTestSentencePieceVocabOp TestSentencePieceFromFileFuntions."; | |||
| @@ -117,74 +68,3 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromFileFuntions) { | |||
| Status rc = SentencePieceVocab::BuildFromFile(path_list, 5000, 0.9995, SentencePieceModel::kUnigram, param_map, &spm); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| } | |||
| TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceTokenizerFuntions) { | |||
| MS_LOG(INFO) << "Doing MindDataTestSentencePieceVocabOp TestSentencePieceTokenizerFuntions."; | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/test_sentencepiece/botchan.txt"; | |||
| auto tree = std::make_shared<ExecutionTree>(); | |||
| std::shared_ptr<TextFileOp> file_op = TextFile({dataset_path}, 1, 2); | |||
| Status rc = tree->AssociateNode(file_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| std::shared_ptr<SentencePieceVocab> spm = std::make_unique<SentencePieceVocab>(); | |||
| std::vector<std::string> cols; | |||
| std::unordered_map<std::string, std::string> m_params; | |||
| std::shared_ptr<BuildSentencePieceVocabOp> spv_op = std::make_shared<BuildSentencePieceVocabOp>( | |||
| spm, cols, 5000, 0.9995, SentencePieceModel::kUnigram, m_params, 2); | |||
| rc = tree->AssociateNode(spv_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = spv_op->AddChild(file_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| file_op->SetTotalRepeats(1); | |||
| file_op->SetNumRepeatsPerEpoch(1); | |||
| rc = tree->AssignRoot(spv_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = tree->Prepare(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = tree->Launch(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| while (!tensor_list.empty()) { | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| } | |||
| std::shared_ptr<Tensor> output_tensor; | |||
| std::unique_ptr<SentencePieceTokenizerOp> op( | |||
| new SentencePieceTokenizerOp(spm, SPieceTokenizerLoadType::kModel, SPieceTokenizerOutType::kString)); | |||
| std::shared_ptr<Tensor> input_tensor; | |||
| Tensor::CreateScalar<std::string>("I saw a girl with a telescope.", &input_tensor); | |||
| Status s = op->Compute(input_tensor, &output_tensor); | |||
| std::vector<std::string> expect; | |||
| expect.push_back("▁I"); | |||
| expect.push_back("▁sa"); | |||
| expect.push_back("w"); | |||
| expect.push_back("▁a"); | |||
| expect.push_back("▁girl"); | |||
| expect.push_back("▁with"); | |||
| expect.push_back("▁a"); | |||
| expect.push_back("▁te"); | |||
| expect.push_back("les"); | |||
| expect.push_back("co"); | |||
| expect.push_back("pe"); | |||
| expect.push_back("."); | |||
| ASSERT_TRUE(output_tensor->Size() == expect.size()); | |||
| for (int i = 0; i < output_tensor->Size(); i++) { | |||
| std::string_view str; | |||
| output_tensor->GetItemAt(&str, {i}); | |||
| std::string sentence{str}; | |||
| ASSERT_TRUE(sentence == expect[i]); | |||
| } | |||
| } | |||
| @@ -1,95 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/util/circular_pool.h" | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "minddata/dataset/engine/jagged_connector.h" | |||
| #include "common/common.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| using namespace mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestSkipOp : public UT::DatasetOpTesting {}; | |||
| TEST_F(MindDataTestSkipOp, TestSkipOpFuntions) { | |||
| // Start with an empty execution tree | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| Status rc; | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| int32_t op_connector_size = config_manager->op_connector_size(); | |||
| int32_t num_workers = config_manager->num_parallel_workers(); | |||
| int32_t worker_connector_size = 16; | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| std::vector<std::string> columns_to_load = {}; | |||
| std::vector<std::string> files = {dataset_path}; | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op = | |||
| std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size, | |||
| columns_to_load, false, 1, 0, false); | |||
| rc = my_tfreader_op->Init(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // SkipOp | |||
| std::shared_ptr<SkipOp> skip_op = std::make_shared<SkipOp>(5); | |||
| rc = my_tree->AssociateNode(skip_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Set children/root layout. | |||
| rc = skip_op->AddChild(my_tfreader_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(skip_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| MS_LOG(INFO) << "Launching tree and begin iteration."; | |||
| rc = my_tree->Prepare(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Launch(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(INFO) << "Row display for row #: " << row_count << "."; | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 7); | |||
| } | |||
| @@ -1,101 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "common/common.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "minddata/dataset/engine/jagged_connector.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestTakeOp : public UT::DatasetOpTesting {}; | |||
| TEST_F(MindDataTestTakeOp, TestTakeProject) { | |||
| // Start with an empty execution tree | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| Status rc; | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; | |||
| // TFReaderOp | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| auto num_workers = 1; | |||
| auto op_connector_size = config_manager->op_connector_size(); | |||
| std::vector<std::string> columns_to_load = {}; | |||
| std::vector<std::string> files = {dataset_path}; | |||
| // worker connector size = 16 | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>( | |||
| num_workers, 16, 0, files, std::make_unique<DataSchema>(), op_connector_size, columns_to_load, false, 1, 0, false); | |||
| rc = my_tfreader_op->Init(); | |||
| ASSERT_OK(rc); | |||
| // TakeOp | |||
| std::shared_ptr<TakeOp> my_take_op = std::make_shared<TakeOp>(5); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| ASSERT_OK(rc); | |||
| rc = my_tree->AssociateNode(my_take_op); | |||
| ASSERT_OK(rc); | |||
| // Set children/root layout. | |||
| rc = my_take_op->AddChild(my_tfreader_op); | |||
| ASSERT_OK(rc); | |||
| rc = my_tree->AssignRoot(my_take_op); | |||
| ASSERT_OK(rc); | |||
| MS_LOG(DEBUG) << "Launching tree and begin iteration."; | |||
| rc = my_tree->Prepare(); | |||
| ASSERT_OK(rc); | |||
| rc = my_tree->Launch(); | |||
| ASSERT_OK(rc); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_OK(rc); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(DEBUG) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_OK(rc); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 5); | |||
| } | |||
| @@ -1,128 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "common/common.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "minddata/dataset/engine/data_schema.h" | |||
| #include "minddata/dataset/engine/datasetops/source/text_file_op.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestTextFileOp : public UT::DatasetOpTesting {}; | |||
| TEST_F(MindDataTestTextFileOp, TestTextFileBasic) { | |||
| // Start with an empty execution tree | |||
| auto tree = std::make_shared<ExecutionTree>(); | |||
| Status rc; | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testTextFileDataset/1.txt"; | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| int32_t num_workers = 1; // Only one file | |||
| int32_t op_connector_size = 2; | |||
| int32_t worker_connector_size = config_manager->worker_connector_size(); | |||
| int64_t total_rows = 0; // read all rows | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| rc = schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)); | |||
| ASSERT_OK(rc); | |||
| std::vector<std::string> files = {dataset_path}; | |||
| bool shuffle_files = false; | |||
| int32_t num_devices = 1; | |||
| int32_t device_id = 0; | |||
| std::shared_ptr<TextFileOp> op = | |||
| std::make_shared<TextFileOp>(num_workers, total_rows, worker_connector_size, std::move(schema), files, | |||
| op_connector_size, shuffle_files, num_devices, device_id); | |||
| rc = op->Init(); | |||
| ASSERT_OK(rc); | |||
| rc = tree->AssociateNode(op); | |||
| ASSERT_OK(rc); | |||
| rc = tree->AssignRoot(op); | |||
| ASSERT_OK(rc); | |||
| MS_LOG(INFO) << "Launching tree and begin iteration."; | |||
| rc = tree->Prepare(); | |||
| ASSERT_OK(rc); | |||
| rc = tree->Launch(); | |||
| ASSERT_OK(rc); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_OK(rc); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_OK(rc); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 3); | |||
| } | |||
| TEST_F(MindDataTestTextFileOp, TestTotalRows) { | |||
| std::string tf_file1 = datasets_root_path_ + "/testTextFileDataset/1.txt"; | |||
| std::string tf_file2 = datasets_root_path_ + "/testTextFileDataset/2.txt"; | |||
| std::vector<std::string> files; | |||
| files.push_back(tf_file1); | |||
| int64_t total_rows = 0; | |||
| TextFileOp::CountAllFileRows(files, &total_rows); | |||
| ASSERT_EQ(total_rows, 3); | |||
| files.clear(); | |||
| files.push_back(tf_file2); | |||
| TextFileOp::CountAllFileRows(files, &total_rows); | |||
| ASSERT_EQ(total_rows, 2); | |||
| files.clear(); | |||
| files.push_back(tf_file1); | |||
| files.push_back(tf_file2); | |||
| TextFileOp::CountAllFileRows(files, &total_rows); | |||
| ASSERT_EQ(total_rows, 5); | |||
| files.clear(); | |||
| } | |||
| TEST_F(MindDataTestTextFileOp, TestTotalRowsFileNotExist) { | |||
| std::string tf_file1 = datasets_root_path_ + "/does/not/exist/0.txt"; | |||
| std::vector<std::string> files; | |||
| files.push_back(tf_file1); | |||
| int64_t total_rows = 0; | |||
| TextFileOp::CountAllFileRows(files, &total_rows); | |||
| ASSERT_EQ(total_rows, 0); | |||
| } | |||
| @@ -1,115 +0,0 @@ | |||
| /** | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "common/common.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "minddata/dataset/engine/datasetops/source/voc_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | |||
| #include "minddata/dataset/include/dataset/datasets.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "securec.h" | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); | |||
| class MindDataTestVOCOp : public UT::DatasetOpTesting { | |||
| protected: | |||
| }; | |||
| TEST_F(MindDataTestVOCOp, TestVOCDetection) { | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testVOC2012"; | |||
| std::shared_ptr<Dataset> ds = | |||
| VOC(dataset_path, "Detection", "train", {}, false, std::make_shared<SequentialSampler>(0, 0)); | |||
| EXPECT_NE(ds, nullptr); | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| int row_count = 0; | |||
| while (row.size() != 0) { | |||
| auto image = row["image"]; | |||
| auto label = row["label"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); | |||
| MS_LOG(INFO) << "Tensor label shape: " << label.Shape(); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 9); | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestVOCOp, TestVOCSegmentation) { | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testVOC2012"; | |||
| std::shared_ptr<Dataset> ds = | |||
| VOC(dataset_path, "Segmentation", "train", {}, false, std::make_shared<SequentialSampler>(0, 0)); | |||
| EXPECT_NE(ds, nullptr); | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| int row_count = 0; | |||
| while (!row.empty()) { | |||
| auto image = row["image"]; | |||
| auto target = row["target"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); | |||
| MS_LOG(INFO) << "Tensor target shape: " << target.Shape(); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 10); | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestVOCOp, TestVOCClassIndex) { | |||
| std::string dataset_path; | |||
| dataset_path = datasets_root_path_ + "/testVOC2012"; | |||
| std::map<std::string, int32_t> class_index; | |||
| class_index["car"] = 0; | |||
| class_index["cat"] = 1; | |||
| class_index["train"] = 5; | |||
| std::shared_ptr<Dataset> ds = | |||
| VOC(dataset_path, "Detection", "train", class_index, false, std::make_shared<SequentialSampler>(0, 0)); | |||
| EXPECT_NE(ds, nullptr); | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| int row_count = 0; | |||
| while (!row.empty()) { | |||
| auto image = row["image"]; | |||
| auto label = row["label"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); | |||
| MS_LOG(INFO) << "Tensor label shape: " << label.Shape(); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 6); | |||
| iter->Stop(); | |||
| } | |||
| @@ -1,214 +0,0 @@ | |||
| /** | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <chrono> | |||
| #include <cstring> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "minddata/dataset/core/client.h" | |||
| #include "minddata/dataset/core/tensor.h" | |||
| #include "minddata/dataset/core/config_manager.h" | |||
| #include "minddata/dataset/engine/datasetops/zip_op.h" | |||
| #include "minddata/dataset/engine/jagged_connector.h" | |||
| #include "common/common.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestZipOp : public UT::DatasetOpTesting {}; | |||
| TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) { | |||
| /* Tree: | |||
| * | |||
| * | |||
| * OpId(2) ZipOp | |||
| * / \ | |||
| * OpId(0) TFReaderOp OpId(1) TFReaderOp | |||
| * Start with an empty execution tree | |||
| */ | |||
| Status rc; | |||
| MS_LOG(INFO) << "UT test TestZipBasic."; | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| // Creating TFReaderOp | |||
| std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data"; | |||
| std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data"; | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| std::vector<std::string> columns_to_load = {}; | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| std::vector<std::string> files1 = {dataset_path}; | |||
| auto op_connector_size = config_manager->op_connector_size(); | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>( | |||
| 1, 16, 0, files1, std::move(schema), op_connector_size, columns_to_load, false, 1, 0, false); | |||
| rc = my_tfreader_op->Init(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| std::vector<std::string> files2 = {dataset_path2}; | |||
| std::unique_ptr<DataSchema> schema2 = std::make_unique<DataSchema>(); | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op2 = std::make_shared<TFReaderOp>( | |||
| 1, 1, 0, files2, std::make_unique<DataSchema>(), op_connector_size, columns_to_load, false, 1, 0, false); | |||
| rc = my_tfreader_op2->Init(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op2); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Creating DatasetOp | |||
| std::shared_ptr<ZipOp> zip_op = std::make_shared<ZipOp>(); | |||
| rc = my_tree->AssociateNode(zip_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = zip_op->AddChild(std::move(my_tfreader_op)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = zip_op->AddChild(std::move(my_tfreader_op2)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(zip_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Prepare(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Launch the tree execution to kick off threads and start running the pipeline | |||
| MS_LOG(INFO) << "Launching my tree."; | |||
| rc = my_tree->Launch(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Simulate a parse of data from our pipeline. | |||
| std::shared_ptr<DatasetOp> rootNode = my_tree->root(); | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(INFO) << "Row display for row #: " << row_count << "."; | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()) << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 3); // Should be 3 rows fetched | |||
| } | |||
| TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) { | |||
| /* Tree: | |||
| * OpId(3) Repeat(3) | |||
| * | |||
| * OpId(2) ZipOp | |||
| * / \ | |||
| * OpId(0) TFReaderOp OpId(1) TFReaderOp | |||
| * | |||
| * Start with an empty execution tree | |||
| */ | |||
| Status rc; | |||
| MS_LOG(INFO) << "UT test TestZipRepeat."; | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| uint32_t num_repeats = 3; | |||
| std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data"; | |||
| std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data"; | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| auto op_connector_size = config_manager->op_connector_size(); | |||
| std::vector<std::string> columns_to_load = {}; | |||
| std::vector<std::string> files1 = {dataset_path}; | |||
| std::unique_ptr<DataSchema> schema1 = std::make_unique<DataSchema>(); | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>( | |||
| 1, 16, 0, files1, std::move(schema1), op_connector_size, columns_to_load, false, 1, 0, false); | |||
| rc = my_tfreader_op->Init(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| std::vector<std::string> files2 = {dataset_path2}; | |||
| std::unique_ptr<DataSchema> schema2 = std::make_unique<DataSchema>(); | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op2 = std::make_shared<TFReaderOp>( | |||
| 1, 1, 0, files2, std::move(schema2), op_connector_size, columns_to_load, false, 1, 0, false); | |||
| rc = my_tfreader_op2->Init(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op2); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Creating DatasetOp | |||
| std::shared_ptr<ZipOp> zip_op = std::make_shared<ZipOp>(); | |||
| rc = my_tree->AssociateNode(zip_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| my_tfreader_op->SetTotalRepeats(num_repeats); | |||
| my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats); | |||
| rc = zip_op->AddChild(std::move(my_tfreader_op)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| my_tfreader_op2->SetTotalRepeats(num_repeats); | |||
| my_tfreader_op2->SetNumRepeatsPerEpoch(num_repeats); | |||
| rc = zip_op->AddChild(std::move(my_tfreader_op2)); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats); | |||
| rc = my_tree->AssociateNode(my_repeat_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| zip_op->SetTotalRepeats(num_repeats); | |||
| zip_op->SetNumRepeatsPerEpoch(num_repeats); | |||
| rc = my_repeat_op->AddChild(zip_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(my_repeat_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Prepare(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Launch the tree execution to kick off threads and start running the pipeline | |||
| MS_LOG(INFO) << "Launching my tree."; | |||
| rc = my_tree->Launch(); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Simulate a parse of data from our pipeline. | |||
| std::shared_ptr<DatasetOp> rootNode = my_tree->root(); | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(INFO) << "Row display for row #: " << row_count << "."; | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()) << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 9); // Should be 9 rows fetched | |||
| } | |||