|
|
@@ -33,7 +33,7 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetBasic) { |
|
|
// Create a CSVDataset, with single CSV file |
|
|
// Create a CSVDataset, with single CSV file |
|
|
std::string train_file = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string train_file = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, -1, ShuffleMode::kFalse); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, 0, ShuffleMode::kFalse); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
|
|
|
|
|
|
// Create an iterator over the result of the above dataset |
|
|
// Create an iterator over the result of the above dataset |
|
|
@@ -85,7 +85,7 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetMultiFiles) { |
|
|
std::string file1 = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string file1 = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string file2 = datasets_root_path_ + "/testCSV/append.csv"; |
|
|
std::string file2 = datasets_root_path_ + "/testCSV/append.csv"; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::shared_ptr<Dataset> ds = CSV({file1, file2}, ',', {}, column_names, -1, ShuffleMode::kGlobal); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds = CSV({file1, file2}, ',', {}, column_names, 0, ShuffleMode::kGlobal); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
|
|
|
|
|
|
// Create an iterator over the result of the above dataset |
|
|
// Create an iterator over the result of the above dataset |
|
|
@@ -179,7 +179,7 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetDistribution) { |
|
|
// Create a CSVDataset, with single CSV file |
|
|
// Create a CSVDataset, with single CSV file |
|
|
std::string file = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string file = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::shared_ptr<Dataset> ds = CSV({file}, ',', {}, column_names, -1, ShuffleMode::kFalse, 2, 0); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds = CSV({file}, ',', {}, column_names, 0, ShuffleMode::kFalse, 2, 0); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
|
|
|
|
|
|
// Create an iterator over the result of the above dataset |
|
|
// Create an iterator over the result of the above dataset |
|
|
@@ -228,7 +228,7 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetType) { |
|
|
std::make_shared<CsvRecord<std::string>>(CsvType::STRING, ""), |
|
|
std::make_shared<CsvRecord<std::string>>(CsvType::STRING, ""), |
|
|
}; |
|
|
}; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::shared_ptr<Dataset> ds = CSV({file}, ',', colum_type, column_names, -1, ShuffleMode::kFalse); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds = CSV({file}, ',', colum_type, column_names, 0, ShuffleMode::kFalse); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
|
|
|
|
|
|
// Create an iterator over the result of the above dataset |
|
|
// Create an iterator over the result of the above dataset |
|
|
@@ -343,15 +343,15 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetException) { |
|
|
EXPECT_EQ(ds1, nullptr); |
|
|
EXPECT_EQ(ds1, nullptr); |
|
|
|
|
|
|
|
|
// Test invalid num_samples < -1 |
|
|
// Test invalid num_samples < -1 |
|
|
std::shared_ptr<Dataset> ds2 = CSV({file}, ',', {}, column_names, -2); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds2 = CSV({file}, ',', {}, column_names, -1); |
|
|
EXPECT_EQ(ds2, nullptr); |
|
|
EXPECT_EQ(ds2, nullptr); |
|
|
|
|
|
|
|
|
// Test invalid num_shards < 1 |
|
|
// Test invalid num_shards < 1 |
|
|
std::shared_ptr<Dataset> ds3 = CSV({file}, ',', {}, column_names, -1, ShuffleMode::kFalse, 0); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds3 = CSV({file}, ',', {}, column_names, 0, ShuffleMode::kFalse, 0); |
|
|
EXPECT_EQ(ds3, nullptr); |
|
|
EXPECT_EQ(ds3, nullptr); |
|
|
|
|
|
|
|
|
// Test invalid shard_id >= num_shards |
|
|
// Test invalid shard_id >= num_shards |
|
|
std::shared_ptr<Dataset> ds4 = CSV({file}, ',', {}, column_names, -1, ShuffleMode::kFalse, 2, 2); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds4 = CSV({file}, ',', {}, column_names, 0, ShuffleMode::kFalse, 2, 2); |
|
|
EXPECT_EQ(ds4, nullptr); |
|
|
EXPECT_EQ(ds4, nullptr); |
|
|
|
|
|
|
|
|
// Test invalid field_delim |
|
|
// Test invalid field_delim |
|
|
@@ -373,7 +373,7 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetShuffleFilesA) { |
|
|
std::string file1 = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string file1 = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string file2 = datasets_root_path_ + "/testCSV/append.csv"; |
|
|
std::string file2 = datasets_root_path_ + "/testCSV/append.csv"; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::shared_ptr<Dataset> ds = CSV({file1, file2}, ',', {}, column_names, -1, ShuffleMode::kFiles); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds = CSV({file1, file2}, ',', {}, column_names, 0, ShuffleMode::kFiles); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
|
|
|
|
|
|
// Create an iterator over the result of the above dataset |
|
|
// Create an iterator over the result of the above dataset |
|
|
@@ -432,7 +432,7 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetShuffleFilesB) { |
|
|
std::string file1 = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string file1 = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string file2 = datasets_root_path_ + "/testCSV/append.csv"; |
|
|
std::string file2 = datasets_root_path_ + "/testCSV/append.csv"; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::shared_ptr<Dataset> ds = CSV({file2, file1}, ',', {}, column_names, -1, ShuffleMode::kFiles); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds = CSV({file2, file1}, ',', {}, column_names, 0, ShuffleMode::kFiles); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
|
|
|
|
|
|
// Create an iterator over the result of the above dataset |
|
|
// Create an iterator over the result of the above dataset |
|
|
@@ -492,7 +492,7 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetShuffleGlobal) { |
|
|
// Create a CSVFile Dataset, with single CSV file |
|
|
// Create a CSVFile Dataset, with single CSV file |
|
|
std::string train_file = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string train_file = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; |
|
|
std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, -1, ShuffleMode::kGlobal); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, 0, ShuffleMode::kGlobal); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
EXPECT_NE(ds, nullptr); |
|
|
|
|
|
|
|
|
// Create an iterator over the result of the above dataset |
|
|
// Create an iterator over the result of the above dataset |
|
|
@@ -540,7 +540,7 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetDuplicateColumnName) { |
|
|
// Create a CSVDataset, with single CSV file |
|
|
// Create a CSVDataset, with single CSV file |
|
|
std::string train_file = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::string train_file = datasets_root_path_ + "/testCSV/1.csv"; |
|
|
std::vector<std::string> column_names = {"col1", "col1", "col3", "col4"}; |
|
|
std::vector<std::string> column_names = {"col1", "col1", "col3", "col4"}; |
|
|
std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, -1, ShuffleMode::kFalse); |
|
|
|
|
|
|
|
|
std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, 0, ShuffleMode::kFalse); |
|
|
// Expect failure: duplicate column names |
|
|
// Expect failure: duplicate column names |
|
|
EXPECT_EQ(ds, nullptr); |
|
|
EXPECT_EQ(ds, nullptr); |
|
|
} |
|
|
} |