Browse Source

remove old tests and move coverage

tags/v1.6.0
hetshah 4 years ago
parent
commit
5c703015e4
17 changed files with 449 additions and 1555 deletions
  1. +0
    -19
      tests/ut/cpp/dataset/CMakeLists.txt
  2. +230
    -9
      tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
  3. +15
    -0
      tests/ut/cpp/dataset/c_api_dataset_textfile_test.cc
  4. +52
    -0
      tests/ut/cpp/dataset/c_api_dataset_voc_test.cc
  5. +136
    -0
      tests/ut/cpp/dataset/c_api_samplers_test.cc
  6. +0
    -29
      tests/ut/cpp/dataset/execution_tree_test.cc
  7. +0
    -53
      tests/ut/cpp/dataset/filter_op_test.cc
  8. +0
    -456
      tests/ut/cpp/dataset/image_folder_op_test.cc
  9. +16
    -3
      tests/ut/cpp/dataset/map_op_test.cc
  10. +0
    -102
      tests/ut/cpp/dataset/project_op_test.cc
  11. +0
    -111
      tests/ut/cpp/dataset/rename_op_test.cc
  12. +0
    -120
      tests/ut/cpp/dataset/sentence_piece_vocab_op_test.cc
  13. +0
    -95
      tests/ut/cpp/dataset/skip_op_test.cc
  14. +0
    -101
      tests/ut/cpp/dataset/take_op_test.cc
  15. +0
    -128
      tests/ut/cpp/dataset/text_file_op_test.cc
  16. +0
    -115
      tests/ut/cpp/dataset/voc_op_test.cc
  17. +0
    -214
      tests/ut/cpp/dataset/zip_op_test.cc

+ 0
- 19
tests/ut/cpp/dataset/CMakeLists.txt View File

@@ -3,7 +3,6 @@ include(GoogleTest)
SET(DE_UT_SRCS
affine_op_test.cc
execute_test.cc
album_op_test.cc
arena_test.cc
auto_contrast_op_test.cc
batch_op_test.cc
@@ -60,18 +59,13 @@ SET(DE_UT_SRCS
c_api_vision_soft_dvpp_test.cc
c_api_vision_uniform_aug_test.cc
c_api_vision_vertical_flip_test.cc
celeba_op_test.cc
center_crop_op_test.cc
channel_swap_test.cc
cifar_op_test.cc
circular_pool_test.cc
client_config_test.cc
clue_op_test.cc
coco_op_test.cc
common/bboxop_common.cc
common/common.cc
common/cvop_common.cc
concat_op_test.cc
concatenate_op_test.cc
connector_test.cc
csv_op_test.cc
@@ -88,7 +82,6 @@ SET(DE_UT_SRCS
c_api_vision_gaussian_blur_test.cc
global_context_test.cc
gnn_graph_test.cc
image_folder_op_test.cc
image_process_test.cc
interrupt_test.cc
ir_callback_test.cc
@@ -104,7 +97,6 @@ SET(DE_UT_SRCS
memory_pool_test.cc
mind_record_op_test.cc
mixup_batch_op_test.cc
mnist_op_test.cc
normalize_op_test.cc
one_hot_op_test.cc
optimization_pass_test.cc
@@ -112,7 +104,6 @@ SET(DE_UT_SRCS
pad_op_test.cc
path_test.cc
perf_data_test.cc
project_op_test.cc
queue_test.cc
random_affine_op_test.cc
random_color_adjust_op_test.cc
@@ -131,16 +122,12 @@ SET(DE_UT_SRCS
random_solarize_op_test.cc
random_vertical_flip_op_test.cc
random_vertical_flip_with_bbox_op_test.cc
rename_op_test.cc
rescale_op_test.cc
resize_op_test.cc
resize_with_bbox_op_test.cc
rgba_to_bgr_op_test.cc
rgba_to_rgb_op_test.cc
schema_test.cc
sentence_piece_vocab_op_test.cc
shuffle_op_test.cc
skip_op_test.cc
slice_op_test.cc
sliding_window_op_test.cc
solarize_op_test.cc
@@ -150,13 +137,11 @@ SET(DE_UT_SRCS
subset_random_sampler_test.cc
subset_sampler_test.cc
swap_red_blue_test.cc
take_op_test.cc
task_manager_test.cc
tensor_row_test.cc
tensor_string_test.cc
tensor_test.cc
tensorshape_test.cc
text_file_op_test.cc
tfReader_op_test.cc
to_float16_op_test.cc
tokenizer_op_test.cc
@@ -165,16 +150,12 @@ SET(DE_UT_SRCS
trucate_pair_test.cc
type_cast_op_test.cc
weighted_random_sampler_test.cc
zip_op_test.cc
)

if(ENABLE_PYTHON)
set(DE_UT_SRCS
${DE_UT_SRCS}
filter_op_test.cc
manifest_op_test.cc
voc_op_test.cc
sentence_piece_vocab_op_test.cc
)
endif()



+ 230
- 9
tests/ut/cpp/dataset/c_api_dataset_ops_test.cc View File

@@ -779,21 +779,20 @@ TEST_F(MindDataTestPipeline, TestFilterFail3) {
EXPECT_EQ(iter, nullptr);
}

TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderBatchAndRepeat.";

void ImageFolderBatchAndRepeat(int32_t repeat_count, int32_t batch_size, int64_t num_samples,
bool replacement, std::string datasets_root_path) {
// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
std::string folder_path = datasets_root_path + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true,
std::make_shared<RandomSampler>(replacement, num_samples));
uint64_t ds_size = 44;
EXPECT_NE(ds, nullptr);

// Create a Repeat operation on ds
int32_t repeat_num = 2;
ds = ds->Repeat(repeat_num);
ds = ds->Repeat(repeat_count);
EXPECT_NE(ds, nullptr);

// Create a Batch operation on ds
int32_t batch_size = 2;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);

@@ -814,12 +813,35 @@ TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
ASSERT_OK(iter->GetNextRow(&row));
}

EXPECT_EQ(i, 10);
uint64_t expect = 0;
if (batch_size != 0) {
if (num_samples == 0) {
expect = ds_size * repeat_count / batch_size;
} else {
expect = num_samples * repeat_count / batch_size;
}
} else {
expect = 0;
}
EXPECT_EQ(i, expect);

// Manually terminate the pipeline
iter->Stop();
}

// Feature: Test ImageFolder with Batch and Repeat operations
// Description: Perform Repeat and Batch ops with varying parameters,
// iterate through dataset and count rows
// Expectation: Number of rows should be equal to the size of the dataset/num_samples
// times the repeat_count divided by the batch_size
TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderBatchAndRepeat.";
ImageFolderBatchAndRepeat(2, 2, 10, false, datasets_root_path_);
ImageFolderBatchAndRepeat(2, 11, 0, false, datasets_root_path_);
ImageFolderBatchAndRepeat(3, 2, 12, true, datasets_root_path_);
}

TEST_F(MindDataTestPipeline, TestPipelineGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPipelineGetDatasetSize.";

@@ -1997,3 +2019,202 @@ TEST_F(MindDataTestPipeline, TestConcatTFRecord) {
// Manually terminate the pipeline
iter->Stop();
}

// Feature: Test ImageFolder with Sequential Sampler and Decode
// Description: Create ImageFolder dataset with decode=true, iterate through dataset and count rows
// Expectation: There should be 20 rows in the dataset (# of samples taken)
TEST_F(MindDataTestPipeline, TestImageFolderDecode) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderDecode.";

std::shared_ptr<Sampler> sampler = std::make_shared<SequentialSampler>(0 , 20);
EXPECT_NE(sampler, nullptr);

// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, sampler);
EXPECT_NE(ds, nullptr);

// Iterate the dataset and get each row
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}

EXPECT_EQ(i, 20);
iter->Stop();
}

// Feature: Test TFRecord with Take operation
// Description: Perform Take operation with count = 5, iterate through dataset and count rows
// Expectation: There should be 5 rows in the dataset
TEST_F(MindDataTestPipeline, TestTFRecordTake) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordTake.";

// Create a TFRecord Dataset
std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
std::string schema_path = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json";
std::shared_ptr<Dataset> ds = TFRecord({file_path}, schema_path);
EXPECT_NE(ds, nullptr);

// Create a Take operation on ds
ds = ds->Take(5);
EXPECT_NE(ds, nullptr);

// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);

// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}
MS_LOG(INFO) << "Number of rows: " << i;

// Expect 5 rows
EXPECT_EQ(i, 5);

// Manually terminate the pipeline
iter->Stop();
}

// Feature: Test Skip operation on TFRecord dataset
// Description: Perform skip operation with count = 5, iterate through dataset and count rows
// Expectation: There should be 7 rows, (12 rows initially and 5 are skipped)
TEST_F(MindDataTestPipeline, TestTFRecordSkip) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordSkip.";

// Create a TFRecord Dataset
std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
std::string schema_path = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json";
std::shared_ptr<Dataset> ds = TFRecord({file_path}, schema_path);
EXPECT_NE(ds, nullptr);

// Create a Take operation on ds
ds = ds->Skip(5);
EXPECT_NE(ds, nullptr);

// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);

// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}
MS_LOG(INFO) << "Number of rows: " << i;

// Expect 7 rows
EXPECT_EQ(i, 7);

// Manually terminate the pipeline
iter->Stop();
}

// Feature: Test Rename operation on TFRecord
// Description: Rename columns in dataset, iterate through dataset and count rows
// Expectation: The columns should have a new name after the Rename op and there should be 3 rows in the dataset
TEST_F(MindDataTestPipeline, TestTFRecordRename) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordRename.";

// Create a TFRecord Dataset
std::string file_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
std::shared_ptr<Dataset> ds = TFRecord({file_path});
EXPECT_NE(ds, nullptr);

// Create a Rename operation on ds
ds = ds->Rename({"label"}, {"label1"});
ds = ds->Rename({"label1", "image"}, {"label2", "image1"});
EXPECT_NE(ds, nullptr);

// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);

// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
EXPECT_NE(row.find("label2"), row.end());
EXPECT_NE(row.find("image1"), row.end());
EXPECT_EQ(row.find("image"), row.end());
EXPECT_EQ(row.find("label"), row.end());
EXPECT_EQ(row.find("label1"), row.end());

while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}

EXPECT_EQ(i, 3);

// Manually terminate the pipeline
iter->Stop();
}

// Feature: Test TFRecord with Zip and Repeat operation
// Description: Create two datasets and apply Zip operation on them.
// Apply Repeat operation on resulting dataset and count rows
// Expectation: There should be 9 rows in the dataset
TEST_F(MindDataTestPipeline, TestTFRecordZip) {
// Testing the member zip() function
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordZip.";

// Create a TFRecord Dataset
std::string file_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
std::shared_ptr<Dataset> ds = TFRecord({file_path});
EXPECT_NE(ds, nullptr);
// Create a TFRecord Dataset
std::string file_path1 = datasets_root_path_ + "/testBatchDataset/test.data";
std::shared_ptr<Dataset> ds1 = TFRecord({file_path1});
EXPECT_NE(ds1, nullptr);

// Create a Zip operation on the datasets
ds = ds->Zip({ds1});
EXPECT_NE(ds, nullptr);

// Create a Repeat operation on ds
int32_t repeat_num = 3;
ds = ds->Repeat(repeat_num);
EXPECT_NE(ds, nullptr);

// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);

// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}

EXPECT_EQ(i, 9);

// Manually terminate the pipeline
iter->Stop();
}

+ 15
- 0
tests/ut/cpp/dataset/c_api_dataset_textfile_test.cc View File

@@ -283,6 +283,21 @@ TEST_F(MindDataTestPipeline, TestTextFileDatasetFail7) {
EXPECT_EQ(iter, nullptr);
}

// Feature: Test Textfile dataset
// Description: Create TextFile dataset with a file that does not exist and check the size of the dataset
// Expectation: The dataset should have size 0
TEST_F(MindDataTestPipeline, TestTextFileFileNotExist) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileDatasetFail8.";

// Create a TextFile Dataset
// with non-existent dataset_files input
std::string tf_file1 = datasets_root_path_ + "/does/not/exist/0.txt";
std::shared_ptr<Dataset> ds = TextFile({tf_file1});
EXPECT_NE(ds, nullptr);

EXPECT_EQ(ds->GetDatasetSize(), 0);
}

TEST_F(MindDataTestPipeline, TestTextFileDatasetShuffleFalse1A) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileDatasetShuffleFalse1A.";
// Test TextFile Dataset with two text files and no shuffle, num_parallel_workers=1


+ 52
- 0
tests/ut/cpp/dataset/c_api_dataset_voc_test.cc View File

@@ -166,6 +166,32 @@ TEST_F(MindDataTestPipeline, TestVOCDetection) {
iter->Stop();
}

// Feature: Test VOC dataset with detection task
// Description: Create VOC dataset with task="Detection" and count rows
// Expectation: There should be 9 rows
TEST_F(MindDataTestPipeline, TestVOCDetection1) {
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testVOC2012";
std::shared_ptr<Dataset> ds =
VOC(dataset_path, "Detection", "train", {}, false, std::make_shared<SequentialSampler>(0, 0));
EXPECT_NE(ds, nullptr);
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
int row_count = 0;
while (row.size() != 0) {
auto image = row["image"];
auto label = row["label"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
MS_LOG(INFO) << "Tensor label shape: " << label.Shape();
ASSERT_OK(iter->GetNextRow(&row));
row_count++;
}
ASSERT_EQ(row_count, 9);
iter->Stop();
}

TEST_F(MindDataTestPipeline, TestVOCInvalidTaskOrModeError1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCInvalidTaskOrModeError1.";

@@ -235,6 +261,32 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentation) {
iter->Stop();
}

// Feature: Test VOC dataset with Segmentation task
// Description: Create VOC dataset with take="Segmentation" and count rows
// Expectation: There should be 10 rows
TEST_F(MindDataTestPipeline, TestVOCSegmentation1) {
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testVOC2012";
std::shared_ptr<Dataset> ds =
VOC(dataset_path, "Segmentation", "train", {}, false, std::make_shared<SequentialSampler>(0, 0));
EXPECT_NE(ds, nullptr);
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
int row_count = 0;
while (!row.empty()) {
auto image = row["image"];
auto target = row["target"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
MS_LOG(INFO) << "Tensor target shape: " << target.Shape();
ASSERT_OK(iter->GetNextRow(&row));
row_count++;
}
ASSERT_EQ(row_count, 10);
iter->Stop();
}

TEST_F(MindDataTestPipeline, TestVOCSegmentationError2) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentationError2.";



+ 136
- 0
tests/ut/cpp/dataset/c_api_samplers_test.cc View File

@@ -88,6 +88,43 @@ TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
iter->Stop();
}

// Feature: Test ImageFolder with WeightedRandomSampler
// Description: Create ImageFolder dataset with WeightedRandomRampler given num_samples=12,
// iterate through dataset and count rows
// Expectation: There should be 12 rows
TEST_F(MindDataTestPipeline, TestWeightedRandomSamplerImageFolder) {
std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
std::shared_ptr<Sampler> sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
EXPECT_NE(sampl, nullptr);

// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
EXPECT_NE(ds, nullptr);

// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);

// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}

EXPECT_EQ(i, 12);

// Manually terminate the pipeline
iter->Stop();
}

TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1.";
// Test building a dataset with no sampler provided (defaults to random sampler
@@ -234,6 +271,74 @@ TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess4) {
iter->Stop();
}

// Feature: Test ImageFolder with DistributedSampler
// Description: Create ImageFolder dataset with DistributedSampler given num_shards=11 and shard_id=10,
// count rows in dataset
// Expectation: There should be 4 rows (44 rows in original data/11 = 4)
TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess5) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess5.";
// Test basic setting of distributed_sampler

// num_shards=11, shard_id=10, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(11, 10, false, 0, 0, -1, true);
EXPECT_NE(sampler, nullptr);

// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
EXPECT_NE(ds, nullptr);

// Iterate the dataset and get each row
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
while (row.size() != 0) {
i++;
auto label = row["label"];
ASSERT_OK(iter->GetNextRow(&row));
}

EXPECT_EQ(i, 4);
iter->Stop();
}

// Feature: Test ImageFolder with DistributedSampler
// Description: Create ImageFolder dataset with DistributedSampler given num_shards=4 and shard_id=3,
// count rows in dataset
// Expectation: There should be 11 rows (44 rows in original data/4 = 11)
TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess6) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess6.";
// Test basic setting of distributed_sampler

// num_shards=4, shard_id=3, shuffle=false, num_samplers=12, seed=0, offset=-1, even_dist=true
std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 3, false, 12, 0, -1, true);
EXPECT_NE(sampler, nullptr);

// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
EXPECT_NE(ds, nullptr);

// Iterate the dataset and get each row
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
while (row.size() != 0) {
i++;
auto label = row["label"];
ASSERT_OK(iter->GetNextRow(&row));
}

EXPECT_EQ(i, 11);
iter->Stop();
}

TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1.";
// Test basic setting of distributed_sampler
@@ -441,3 +546,34 @@ TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) {

iter->Stop();
}

// Feature: Test ImageFolder with PKSampler
// Description: Create ImageFolder dataset with DistributedSampler given num_val=3 and count rows
// Expectation: There should be 12 rows
TEST_F(MindDataTestPipeline, TestPKSamplerImageFolder) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPKSamplerImageFolder.";

std::shared_ptr<Sampler> sampler = std::make_shared<PKSampler>(3, false);
EXPECT_NE(sampler, nullptr);

// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
EXPECT_NE(ds, nullptr);

// Iterate the dataset and get each row
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}

EXPECT_EQ(i, 12);
iter->Stop();
}

+ 0
- 29
tests/ut/cpp/dataset/execution_tree_test.cc View File

@@ -67,17 +67,6 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) {
my_tree->AssignRoot(root_op);
root_op->AddChild(parent_op);
ASSERT_NE(root_op, nullptr);
// Testing Iterator
MS_LOG(INFO) << "Testing Tree Iterator from root.";
for (auto itr = my_tree->begin(); itr != my_tree->end(); ++itr) {
itr->Print(std::cout, false);
}
MS_LOG(INFO) << "Finished testing Tree Iterator from root.";
MS_LOG(INFO) << "Testing Tree Iterator from parentOp.";
for (auto itr = my_tree->begin(parent_op); itr != my_tree->end(); ++itr) {
itr->Print(std::cout, false);
}
MS_LOG(INFO) << "Finished testing Tree Iterator from parentOp.";

// At this point, since move semantic was used,
// I don't have any operator access myself now.
@@ -120,22 +109,4 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree2) {
MS_LOG(INFO) << "Launching my tree.";
rc = my_tree->Launch();
ASSERT_OK(rc);

// Simulate a parse of data from our pipeline.
std::shared_ptr<DatasetOp> root_node = my_tree->root();

// Start the loop of reading from our pipeline using iterator
MS_LOG(INFO) << "Testing DatasetIterator in testTree2.";
DatasetIterator di(my_tree);
TensorRow buffer;
rc = di.FetchNextTensorRow(&buffer);
EXPECT_TRUE(rc.IsOk());

while (!buffer.empty()) {
rc = di.FetchNextTensorRow(&buffer);
EXPECT_TRUE(rc.IsOk());
}
}

// Construct some tree nodes and play with them
TEST_F(MindDataTestExecutionTree, TestExecutionTree3) { MS_LOG(INFO) << "Doing MindDataTestExecutionTree3."; }

+ 0
- 53
tests/ut/cpp/dataset/filter_op_test.cc View File

@@ -1,53 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/util/circular_pool.h"
#include "minddata/dataset/core/client.h"
#include "common/common.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"

using namespace mindspore::dataset;
namespace de = mindspore::dataset;

using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;

class MindDataTestfilter_op : public UT::DatasetOpTesting {};

std::shared_ptr<de::FilterOp> Filter() {
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_workers = config_manager->num_parallel_workers();
std::shared_ptr<TensorOp> predicate_func;
std::vector<std::string> in_col_names = {};
std::shared_ptr<de::FilterOp> op =
std::make_shared<FilterOp>(in_col_names, num_workers, op_connector_size, predicate_func);
return op;
}

TEST_F(MindDataTestfilter_op, Testfilter_opFuntions) {
MS_LOG(INFO) << "Doing MindDataTest filter_op.";
auto my_tree = std::make_shared<ExecutionTree>();

std::shared_ptr<DatasetOp> parent_op = Filter();

std::shared_ptr<DatasetOp> leaf_op = Filter();
my_tree->AssociateNode(parent_op);
my_tree->AssociateNode(leaf_op);
ASSERT_NE(parent_op, nullptr);
ASSERT_NE(leaf_op, nullptr);
}

+ 0
- 456
tests/ut/cpp/dataset/image_folder_op_test.cc View File

@@ -1,456 +0,0 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include <string>
#include "common/common.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
#include "minddata/dataset/util/status.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"
#include "securec.h"

namespace common = mindspore::common;

using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;

// std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);

// std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);

// std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);

std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
bool shuf = false, std::shared_ptr<SamplerRT> sampler = nullptr,
std::map<std::string, int32_t> map = {}, bool decode = false) {
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
TensorShape scalar = TensorShape::CreateScalar();
(void)schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
(void)schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar));
std::set<std::string> ext = {".jpg", ".JPEG"};
if (sampler == nullptr) {
int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data
int64_t start_index = 0;
sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
}
std::shared_ptr<ImageFolderOp> so =
std::make_shared<ImageFolderOp>(num_works, path, conns, false, decode, ext, map, std::move(schema), sampler);
return so;
}

Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr,
DataType::Type data_type = DataType::DE_UINT32) {
TensorShape shape(std::vector<int64_t>(1, num_elements));
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(shape, DataType(data_type), data, sample_ids));

return Status::OK();
}

class MindDataTestImageFolderSampler : public UT::DatasetOpTesting {
protected:
};

TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeat) {
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto op1 = ImageFolder(16, 2, 32, folder_path, false);
auto op2 = Repeat(2);
op1->SetTotalRepeats(2);
op1->SetNumRepeatsPerEpoch(2);
auto tree = Build({op1, op2});
tree->Prepare();
int32_t res[] = {0, 1, 2, 3};
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(res[(i % 44) / 11] == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 88);
}
}

TEST_F(MindDataTestImageFolderSampler, TestRandomImageFolder) {
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto tree = Build({ImageFolder(16, 2, 32, folder_path, true, nullptr)});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 44);
}
}

TEST_F(MindDataTestImageFolderSampler, TestRandomSamplerImageFolder) {
int32_t original_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(0);
int64_t num_samples = 12;
std::shared_ptr<SamplerRT> sampler = std::make_unique<RandomSamplerRT>(true, num_samples, true);
int32_t res[] = {2, 2, 2, 3, 2, 3, 2, 3, 1, 2, 2, 1}; // ground truth label
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(res[i] == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 12);
}
GlobalContext::config_manager()->set_seed(original_seed);
}

TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeatBatch) {
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto op1 = ImageFolder(16, 2, 32, folder_path, false);
auto op2 = Repeat(2);
auto op3 = Batch(11);
op1->SetTotalRepeats(2);
op1->SetNumRepeatsPerEpoch(2);
auto tree = Build({op1, op2, op3});
tree->Prepare();
int32_t res[4][11] = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
while (tensor_map.size() != 0) {
std::shared_ptr<Tensor> label;
Create1DTensor(&label, 11, reinterpret_cast<unsigned char *>(res[i % 4]), DataType::DE_INT32);
EXPECT_TRUE((*label) == (*tensor_map["label"]));
MS_LOG(DEBUG) << "row: " << i << " " << tensor_map["image"]->shape() << " (*label):" << (*label)
<< " *tensor_map[label]: " << *tensor_map["label"] << std::endl;
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 8);
}
}

TEST_F(MindDataTestImageFolderSampler, TestSubsetRandomSamplerImageFolder) {
// id range 0 - 10 is label 0, and id range 11 - 21 is label 1
std::vector<int64_t> indices({0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11});
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples);
std::string folder_path = datasets_root_path_ + "/testPK/data";
// Expect 6 samples for label 0 and 1
int res[2] = {6, 6};
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
res[label]--;
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_EQ(res[0], 0);
EXPECT_EQ(res[1], 0);
EXPECT_TRUE(i == 12);
}
}

TEST_F(MindDataTestImageFolderSampler, TestWeightedRandomSamplerImageFolder) {
// num samples to draw.
int64_t num_samples = 12;
int64_t total_samples = 44;
int64_t samples_per_tensor = 10;
std::vector<double> weights(total_samples, std::rand() % 100);

// create sampler with replacement = replacement
std::shared_ptr<SamplerRT> sampler =
std::make_shared<WeightedRandomSamplerRT>(weights, num_samples, true, samples_per_tensor);

std::string folder_path = datasets_root_path_ + "/testPK/data";
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 12);
}
}

TEST_F(MindDataTestImageFolderSampler, TestImageFolderClassIndex) {
std::string folder_path = datasets_root_path_ + "/testPK/data";
std::map<std::string, int32_t> map;
map["class3"] = 333;
map["class1"] = 111;
map["wrong folder name"] = 1234; // this is skipped
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, nullptr, map)});
int64_t res[2] = {111, 333};
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(label == res[i / 11]);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 22);
}
}

TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) {
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(11, 10, false, num_samples);
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto op1 = ImageFolder(16, 2, 32, folder_path, false, std::move(sampler));
auto op2 = Repeat(4);
op1->SetTotalRepeats(4);
op1->SetNumRepeatsPerEpoch(4);
auto tree = Build({op1, op2});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_EQ(i % 4, label);
MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 16);
}
}

TEST_F(MindDataTestImageFolderSampler, TestPKSamplerImageFolder) {
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<PKSamplerRT>(3, false, num_samples, 4);
int32_t res[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; // ground truth label
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(res[i] == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 12);
}
}

TEST_F(MindDataTestImageFolderSampler, TestImageFolderDecode) {
std::string folder_path = datasets_root_path_ + "/testPK/data";
std::map<std::string, int32_t> map;
map["class3"] = 333;
map["class1"] = 111;
map["wrong folder name"] = 1234; // this is skipped
int64_t num_samples = 20;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(seq_sampler), map, true)});
int64_t res[2] = {111, 333};
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(label == res[i / 11]);
EXPECT_TRUE(tensor_map["image"]->shape() ==
TensorShape({2268, 4032, 3})); // verify shapes are correct after decode
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 20);
}
}

TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) {
int64_t num_samples = 5;
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 0, false, num_samples);
std::string folder_path = datasets_root_path_ + "/testPK/data";
// numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler), {})});
tree->Prepare();
Status rc = tree->Launch();
int32_t labels[5] = {0, 0, 0, 1, 1};
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_EQ(labels[i], label);
MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 5);
}
}

TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding2) {
int64_t num_samples = 12;
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 3, false, num_samples);
std::string folder_path = datasets_root_path_ + "/testPK/data";
// numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode
auto tree = Build({ImageFolder(16, 16, 32, folder_path, false, std::move(sampler), {})});
tree->Prepare();
Status rc = tree->Launch();
uint32_t labels[11] = {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_EQ(labels[i], label);
MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 11);
}
}

+ 16
- 3
tests/ut/cpp/dataset/map_op_test.cc View File

@@ -25,6 +25,7 @@
#include "minddata/dataset/engine/jagged_connector.h"
#include "minddata/dataset/kernels/image/decode_op.h"
#include "minddata/dataset/kernels/image/resize_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/kernels/tensor_op.h"
#include "utils/log_adapter.h"

@@ -130,9 +131,21 @@ class MindDataTestMapOp : public UT::DatasetOpTesting {

std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
bool shuf = false, std::shared_ptr<SamplerRT> sampler = nullptr,
std::map<std::string, int32_t> map = {}, bool decode = false);

// std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
std::map<std::string, int32_t> map = {}, bool decode = false) {
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
TensorShape scalar = TensorShape::CreateScalar();
(void)schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
(void)schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar));
std::set<std::string> ext = {".jpg", ".JPEG"};
if (sampler == nullptr) {
int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data
int64_t start_index = 0;
sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
}
std::shared_ptr<ImageFolderOp> so =
std::make_shared<ImageFolderOp>(num_works, path, conns, false, decode, ext, map, std::move(schema), sampler);
return so;
}

// TestAsMap scenario:
// TFReaderOp reads a dataset that have column ordering |image|label|A|B|.


+ 0
- 102
tests/ut/cpp/dataset/project_op_test.cc View File

@@ -1,102 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include <vector>

#include "common/common.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/jagged_connector.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"

namespace common = mindspore::common;

using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;

class MindDataTestProjectOp : public UT::DatasetOpTesting {};

TEST_F(MindDataTestProjectOp, TestProjectProject) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";

std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
auto num_workers = 1; // one file, one worker
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::vector<std::string> columns_to_load = {};
std::vector<std::string> files = {dataset_path};
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>(
num_workers, 16, 0, files, std::move(schema), op_connector_size, columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());

// ProjectOp
std::vector<std::string> columns_to_project = {"col_sint16", "col_float", "col_2d"};
std::shared_ptr<ProjectOp> my_project_op = std::make_shared<ProjectOp>(columns_to_project);
rc = my_tree->AssociateNode(my_project_op);
ASSERT_TRUE(rc.IsOk());

// Set children/root layout.
rc = my_project_op->AddChild(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_project_op);
ASSERT_TRUE(rc.IsOk());

MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();

ASSERT_TRUE(rc.IsOk());

rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());

// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());

int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count << ".";

ASSERT_EQ(tensor_list.size(), columns_to_project.size());

// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}

rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}

ASSERT_EQ(row_count, 12);
}

+ 0
- 111
tests/ut/cpp/dataset/rename_op_test.cc View File

@@ -1,111 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstring>
#include <iostream>
#include <memory>
#include <string>
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/datasetops/rename_op.h"
#include "minddata/dataset/engine/jagged_connector.h"
#include "common/common.h"
#include "utils/ms_utils.h"

#include "gtest/gtest.h"
#include "minddata/dataset/core/global_context.h"
#include "utils/log_adapter.h"

namespace common = mindspore::common;

using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;

class MindDataTestRenameOp : public UT::DatasetOpTesting {};

TEST_F(MindDataTestRenameOp, TestRenameOpDefault) {
// Tree:
//
//
// OpId(2) RenameOp
// |
// OpId(0) TFReaderOp
// Start with an empty execution tree
Status rc;
MS_LOG(INFO) << "UT test TestRenameBasic.";
auto my_tree = std::make_shared<ExecutionTree>();
// Creating TFReaderOp

std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_workers = 1;
int32_t worker_connector_size = 16;
std::vector<std::string> columns_to_load = {};
std::vector<std::string> files = {dataset_path};
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::shared_ptr<TFReaderOp> my_tfreader_op =
std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());

// Creating DatasetOp
std::vector<std::string> in_col_names = {"label"};
std::vector<std::string> out_col_names = {"label1"};

std::shared_ptr<RenameOp> rename_op = std::make_shared<RenameOp>(in_col_names, out_col_names);

rc = my_tree->AssociateNode(rename_op);
EXPECT_TRUE(rc.IsOk());
rc = rename_op->AddChild(std::move(my_tfreader_op));
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(rename_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->Prepare();
EXPECT_TRUE(rc.IsOk());

// Launch the tree execution to kick off threads and start running the pipeline
MS_LOG(INFO) << "Launching my tree.";
rc = my_tree->Launch();
EXPECT_TRUE(rc.IsOk());

// Simulate a parse of data from our pipeline.
std::shared_ptr<DatasetOp> root_node = my_tree->root();

DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());

int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count << ".";

// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 3); // Should be 3 rows fetched
}

+ 0
- 120
tests/ut/cpp/dataset/sentence_piece_vocab_op_test.cc View File

@@ -56,55 +56,6 @@ std::shared_ptr<TextFileOp> TextFile(std::vector<std::string> text_files_list, i
return text_file_op;
}

TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromDatasetFuntions) {
MS_LOG(INFO) << "Doing MindDataTestSentencePieceVocabOp TestSentencePieceFromDatasetFuntions.";
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/test_sentencepiece/botchan.txt";
auto tree = std::make_shared<ExecutionTree>();

std::shared_ptr<TextFileOp> file_op = TextFile({dataset_path}, 1, 2);

rc = tree->AssociateNode(file_op);
ASSERT_TRUE(rc.IsOk());
std::vector<std::string> cols;
std::unordered_map<std::string, std::string> m_params;

std::shared_ptr<SentencePieceVocab> spm = std::make_unique<SentencePieceVocab>();
// Sample construstructor for reference
// BuildSentencePieceVocabOp(std::shared_ptr<SentencePieceVocab> vocab, std::vector<std::string> col_names,
// int32_t vocab_size, float character_coverage, SentencePieceModel model_type,
// const std::unordered_map<std::string, std::string> &params, int32_t op_conn_size);
std::shared_ptr<BuildSentencePieceVocabOp> spv_op = std::make_shared<BuildSentencePieceVocabOp>(
std::move(spm), cols, 5000, 0.9995, SentencePieceModel::kUnigram, m_params, 2);
rc = tree->AssociateNode(spv_op);
ASSERT_TRUE(rc.IsOk());

rc = spv_op->AddChild(file_op);
ASSERT_TRUE(rc.IsOk());

file_op->SetTotalRepeats(1);
file_op->SetNumRepeatsPerEpoch(1);
rc = tree->AssignRoot(spv_op);
ASSERT_TRUE(rc.IsOk());
rc = tree->Prepare();
ASSERT_TRUE(rc.IsOk());

rc = tree->Launch();
ASSERT_TRUE(rc.IsOk());

// Start the loop of reading tensors from our pipeline
DatasetIterator di(tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());

while (!tensor_list.empty()) {
rc = di.FetchNextTensorRow(&tensor_list);
}
ASSERT_TRUE(rc.IsOk());
}

TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromFileFuntions) {
MS_LOG(INFO) << "Doing MindDataTestSentencePieceVocabOp TestSentencePieceFromFileFuntions.";

@@ -117,74 +68,3 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromFileFuntions) {
Status rc = SentencePieceVocab::BuildFromFile(path_list, 5000, 0.9995, SentencePieceModel::kUnigram, param_map, &spm);
ASSERT_TRUE(rc.IsOk());
}

TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceTokenizerFuntions) {
MS_LOG(INFO) << "Doing MindDataTestSentencePieceVocabOp TestSentencePieceTokenizerFuntions.";

std::string dataset_path;
dataset_path = datasets_root_path_ + "/test_sentencepiece/botchan.txt";
auto tree = std::make_shared<ExecutionTree>();
std::shared_ptr<TextFileOp> file_op = TextFile({dataset_path}, 1, 2);

Status rc = tree->AssociateNode(file_op);
ASSERT_TRUE(rc.IsOk());

std::shared_ptr<SentencePieceVocab> spm = std::make_unique<SentencePieceVocab>();
std::vector<std::string> cols;
std::unordered_map<std::string, std::string> m_params;

std::shared_ptr<BuildSentencePieceVocabOp> spv_op = std::make_shared<BuildSentencePieceVocabOp>(
spm, cols, 5000, 0.9995, SentencePieceModel::kUnigram, m_params, 2);
rc = tree->AssociateNode(spv_op);
ASSERT_TRUE(rc.IsOk());

rc = spv_op->AddChild(file_op);
ASSERT_TRUE(rc.IsOk());

file_op->SetTotalRepeats(1);
file_op->SetNumRepeatsPerEpoch(1);
rc = tree->AssignRoot(spv_op);
ASSERT_TRUE(rc.IsOk());
rc = tree->Prepare();
ASSERT_TRUE(rc.IsOk());

rc = tree->Launch();
ASSERT_TRUE(rc.IsOk());

// Start the loop of reading tensors from our pipeline
DatasetIterator di(tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());

while (!tensor_list.empty()) {
rc = di.FetchNextTensorRow(&tensor_list);
}
std::shared_ptr<Tensor> output_tensor;
std::unique_ptr<SentencePieceTokenizerOp> op(
new SentencePieceTokenizerOp(spm, SPieceTokenizerLoadType::kModel, SPieceTokenizerOutType::kString));
std::shared_ptr<Tensor> input_tensor;
Tensor::CreateScalar<std::string>("I saw a girl with a telescope.", &input_tensor);
Status s = op->Compute(input_tensor, &output_tensor);

std::vector<std::string> expect;
expect.push_back("▁I");
expect.push_back("▁sa");
expect.push_back("w");
expect.push_back("▁a");
expect.push_back("▁girl");
expect.push_back("▁with");
expect.push_back("▁a");
expect.push_back("▁te");
expect.push_back("les");
expect.push_back("co");
expect.push_back("pe");
expect.push_back(".");
ASSERT_TRUE(output_tensor->Size() == expect.size());
for (int i = 0; i < output_tensor->Size(); i++) {
std::string_view str;
output_tensor->GetItemAt(&str, {i});
std::string sentence{str};
ASSERT_TRUE(sentence == expect[i]);
}
}

+ 0
- 95
tests/ut/cpp/dataset/skip_op_test.cc View File

@@ -1,95 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/util/circular_pool.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/jagged_connector.h"
#include "common/common.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"

using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;

class MindDataTestSkipOp : public UT::DatasetOpTesting {};

TEST_F(MindDataTestSkipOp, TestSkipOpFuntions) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";

std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_workers = config_manager->num_parallel_workers();
int32_t worker_connector_size = 16;
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
std::vector<std::string> columns_to_load = {};
std::vector<std::string> files = {dataset_path};
std::shared_ptr<TFReaderOp> my_tfreader_op =
std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());

// SkipOp
std::shared_ptr<SkipOp> skip_op = std::make_shared<SkipOp>(5);
rc = my_tree->AssociateNode(skip_op);
ASSERT_TRUE(rc.IsOk());

// Set children/root layout.
rc = skip_op->AddChild(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(skip_op);
ASSERT_TRUE(rc.IsOk());

MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();

ASSERT_TRUE(rc.IsOk());

rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());

// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());

int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count << ".";

// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}

rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}

ASSERT_EQ(row_count, 7);
}

+ 0
- 101
tests/ut/cpp/dataset/take_op_test.cc View File

@@ -1,101 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include <vector>

#include "common/common.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/jagged_connector.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"

namespace common = mindspore::common;

using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;

class MindDataTestTakeOp : public UT::DatasetOpTesting {};

TEST_F(MindDataTestTakeOp, TestTakeProject) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";

// TFReaderOp
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto num_workers = 1;
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> columns_to_load = {};
std::vector<std::string> files = {dataset_path};
// worker connector size = 16
std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>(
num_workers, 16, 0, files, std::make_unique<DataSchema>(), op_connector_size, columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_OK(rc);
// TakeOp
std::shared_ptr<TakeOp> my_take_op = std::make_shared<TakeOp>(5);

rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_OK(rc);
rc = my_tree->AssociateNode(my_take_op);
ASSERT_OK(rc);

// Set children/root layout.
rc = my_take_op->AddChild(my_tfreader_op);
ASSERT_OK(rc);
rc = my_tree->AssignRoot(my_take_op);
ASSERT_OK(rc);

MS_LOG(DEBUG) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();

ASSERT_OK(rc);

rc = my_tree->Launch();
ASSERT_OK(rc);

// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_OK(rc);

int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";

// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
}

rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_OK(rc);
row_count++;
}

ASSERT_EQ(row_count, 5);
}

+ 0
- 128
tests/ut/cpp/dataset/text_file_op_test.cc View File

@@ -1,128 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include <vector>

#include "minddata/dataset/core/client.h"
#include "common/common.h"
#include "utils/ms_utils.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"
#include "minddata/dataset/engine/data_schema.h"
#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
#include "minddata/dataset/util/status.h"

namespace common = mindspore::common;

using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;

class MindDataTestTextFileOp : public UT::DatasetOpTesting {};

TEST_F(MindDataTestTextFileOp, TestTextFileBasic) {
// Start with an empty execution tree
auto tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t num_workers = 1; // Only one file
int32_t op_connector_size = 2;
int32_t worker_connector_size = config_manager->worker_connector_size();
int64_t total_rows = 0; // read all rows
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
rc = schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
ASSERT_OK(rc);
std::vector<std::string> files = {dataset_path};
bool shuffle_files = false;
int32_t num_devices = 1;
int32_t device_id = 0;

std::shared_ptr<TextFileOp> op =
std::make_shared<TextFileOp>(num_workers, total_rows, worker_connector_size, std::move(schema), files,
op_connector_size, shuffle_files, num_devices, device_id);
rc = op->Init();
ASSERT_OK(rc);

rc = tree->AssociateNode(op);
ASSERT_OK(rc);

rc = tree->AssignRoot(op);
ASSERT_OK(rc);

MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = tree->Prepare();
ASSERT_OK(rc);

rc = tree->Launch();
ASSERT_OK(rc);

// Start the loop of reading tensors from our pipeline
DatasetIterator di(tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_OK(rc);

int row_count = 0;
while (!tensor_list.empty()) {
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}

rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_OK(rc);
row_count++;
}

ASSERT_EQ(row_count, 3);
}

TEST_F(MindDataTestTextFileOp, TestTotalRows) {
std::string tf_file1 = datasets_root_path_ + "/testTextFileDataset/1.txt";
std::string tf_file2 = datasets_root_path_ + "/testTextFileDataset/2.txt";
std::vector<std::string> files;
files.push_back(tf_file1);
int64_t total_rows = 0;
TextFileOp::CountAllFileRows(files, &total_rows);
ASSERT_EQ(total_rows, 3);
files.clear();

files.push_back(tf_file2);
TextFileOp::CountAllFileRows(files, &total_rows);
ASSERT_EQ(total_rows, 2);
files.clear();

files.push_back(tf_file1);
files.push_back(tf_file2);
TextFileOp::CountAllFileRows(files, &total_rows);
ASSERT_EQ(total_rows, 5);
files.clear();
}

TEST_F(MindDataTestTextFileOp, TestTotalRowsFileNotExist) {
std::string tf_file1 = datasets_root_path_ + "/does/not/exist/0.txt";
std::vector<std::string> files;
files.push_back(tf_file1);
int64_t total_rows = 0;
TextFileOp::CountAllFileRows(files, &total_rows);
ASSERT_EQ(total_rows, 0);
}

+ 0
- 115
tests/ut/cpp/dataset/voc_op_test.cc View File

@@ -1,115 +0,0 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include <string>

#include "common/common.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/datasetops/source/voc_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/include/dataset/datasets.h"
#include "minddata/dataset/util/status.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"
#include "securec.h"

namespace common = mindspore::common;

using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;

std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);

class MindDataTestVOCOp : public UT::DatasetOpTesting {
protected:
};

TEST_F(MindDataTestVOCOp, TestVOCDetection) {
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testVOC2012";
std::shared_ptr<Dataset> ds =
VOC(dataset_path, "Detection", "train", {}, false, std::make_shared<SequentialSampler>(0, 0));
EXPECT_NE(ds, nullptr);
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
int row_count = 0;
while (row.size() != 0) {
auto image = row["image"];
auto label = row["label"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
MS_LOG(INFO) << "Tensor label shape: " << label.Shape();
ASSERT_OK(iter->GetNextRow(&row));
row_count++;
}
ASSERT_EQ(row_count, 9);
iter->Stop();
}

TEST_F(MindDataTestVOCOp, TestVOCSegmentation) {
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testVOC2012";
std::shared_ptr<Dataset> ds =
VOC(dataset_path, "Segmentation", "train", {}, false, std::make_shared<SequentialSampler>(0, 0));
EXPECT_NE(ds, nullptr);
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
int row_count = 0;
while (!row.empty()) {
auto image = row["image"];
auto target = row["target"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
MS_LOG(INFO) << "Tensor target shape: " << target.Shape();
ASSERT_OK(iter->GetNextRow(&row));
row_count++;
}
ASSERT_EQ(row_count, 10);
iter->Stop();
}

TEST_F(MindDataTestVOCOp, TestVOCClassIndex) {
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testVOC2012";
std::map<std::string, int32_t> class_index;
class_index["car"] = 0;
class_index["cat"] = 1;
class_index["train"] = 5;
std::shared_ptr<Dataset> ds =
VOC(dataset_path, "Detection", "train", class_index, false, std::make_shared<SequentialSampler>(0, 0));
EXPECT_NE(ds, nullptr);
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
int row_count = 0;
while (!row.empty()) {
auto image = row["image"];
auto label = row["label"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
MS_LOG(INFO) << "Tensor label shape: " << label.Shape();
ASSERT_OK(iter->GetNextRow(&row));
row_count++;
}
ASSERT_EQ(row_count, 6);
iter->Stop();
}

+ 0
- 214
tests/ut/cpp/dataset/zip_op_test.cc View File

@@ -1,214 +0,0 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <chrono>
#include <cstring>
#include <iostream>
#include <memory>
#include <string>
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/engine/datasetops/zip_op.h"
#include "minddata/dataset/engine/jagged_connector.h"
#include "common/common.h"
#include "utils/ms_utils.h"

#include "gtest/gtest.h"
#include "utils/log_adapter.h"

namespace common = mindspore::common;

using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;

class MindDataTestZipOp : public UT::DatasetOpTesting {};

TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
/* Tree:
*
*
* OpId(2) ZipOp
* / \
* OpId(0) TFReaderOp OpId(1) TFReaderOp
* Start with an empty execution tree
*/
Status rc;
MS_LOG(INFO) << "UT test TestZipBasic.";
auto my_tree = std::make_shared<ExecutionTree>();
// Creating TFReaderOp

std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data";
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
std::vector<std::string> columns_to_load = {};
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::vector<std::string> files1 = {dataset_path};
auto op_connector_size = config_manager->op_connector_size();
std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>(
1, 16, 0, files1, std::move(schema), op_connector_size, columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
std::vector<std::string> files2 = {dataset_path2};
std::unique_ptr<DataSchema> schema2 = std::make_unique<DataSchema>();
std::shared_ptr<TFReaderOp> my_tfreader_op2 = std::make_shared<TFReaderOp>(
1, 1, 0, files2, std::make_unique<DataSchema>(), op_connector_size, columns_to_load, false, 1, 0, false);
rc = my_tfreader_op2->Init();
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op2);
EXPECT_TRUE(rc.IsOk());

// Creating DatasetOp
std::shared_ptr<ZipOp> zip_op = std::make_shared<ZipOp>();

rc = my_tree->AssociateNode(zip_op);
EXPECT_TRUE(rc.IsOk());
rc = zip_op->AddChild(std::move(my_tfreader_op));
EXPECT_TRUE(rc.IsOk());
rc = zip_op->AddChild(std::move(my_tfreader_op2));
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(zip_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->Prepare();
EXPECT_TRUE(rc.IsOk());

// Launch the tree execution to kick off threads and start running the pipeline
MS_LOG(INFO) << "Launching my tree.";
rc = my_tree->Launch();
EXPECT_TRUE(rc.IsOk());

// Simulate a parse of data from our pipeline.
std::shared_ptr<DatasetOp> rootNode = my_tree->root();

DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());

int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count << ".";

// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()) << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 3); // Should be 3 rows fetched
}

TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
/* Tree:
* OpId(3) Repeat(3)
*
* OpId(2) ZipOp
* / \
* OpId(0) TFReaderOp OpId(1) TFReaderOp
*
* Start with an empty execution tree
*/
Status rc;
MS_LOG(INFO) << "UT test TestZipRepeat.";
auto my_tree = std::make_shared<ExecutionTree>();

uint32_t num_repeats = 3;
std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data";
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> columns_to_load = {};
std::vector<std::string> files1 = {dataset_path};
std::unique_ptr<DataSchema> schema1 = std::make_unique<DataSchema>();
std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>(
1, 16, 0, files1, std::move(schema1), op_connector_size, columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
EXPECT_TRUE(rc.IsOk());

rc = my_tree->AssociateNode(my_tfreader_op);

rc = my_tree->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
std::vector<std::string> files2 = {dataset_path2};
std::unique_ptr<DataSchema> schema2 = std::make_unique<DataSchema>();
std::shared_ptr<TFReaderOp> my_tfreader_op2 = std::make_shared<TFReaderOp>(
1, 1, 0, files2, std::move(schema2), op_connector_size, columns_to_load, false, 1, 0, false);
rc = my_tfreader_op2->Init();
EXPECT_TRUE(rc.IsOk());

rc = my_tree->AssociateNode(my_tfreader_op2);
EXPECT_TRUE(rc.IsOk());
// Creating DatasetOp
std::shared_ptr<ZipOp> zip_op = std::make_shared<ZipOp>();
rc = my_tree->AssociateNode(zip_op);
EXPECT_TRUE(rc.IsOk());
my_tfreader_op->SetTotalRepeats(num_repeats);
my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats);
rc = zip_op->AddChild(std::move(my_tfreader_op));
EXPECT_TRUE(rc.IsOk());
my_tfreader_op2->SetTotalRepeats(num_repeats);
my_tfreader_op2->SetNumRepeatsPerEpoch(num_repeats);
rc = zip_op->AddChild(std::move(my_tfreader_op2));
EXPECT_TRUE(rc.IsOk());

std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats);
rc = my_tree->AssociateNode(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
zip_op->SetTotalRepeats(num_repeats);
zip_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_repeat_op->AddChild(zip_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->Prepare();
EXPECT_TRUE(rc.IsOk());

// Launch the tree execution to kick off threads and start running the pipeline
MS_LOG(INFO) << "Launching my tree.";
rc = my_tree->Launch();
EXPECT_TRUE(rc.IsOk());

// Simulate a parse of data from our pipeline.
std::shared_ptr<DatasetOp> rootNode = my_tree->root();

DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());

int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count << ".";

// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()) << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 9); // Should be 9 rows fetched
}

Loading…
Cancel
Save