diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h index 5182c65d1d..6558b91029 100644 --- a/mindspore/ccsrc/minddata/dataset/include/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h @@ -159,9 +159,9 @@ class Compose final : public TensorTransform { class Concatenate final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] axis Concatenate the tensors along given axis (Default=0). - /// \param[in] prepend MSTensor to be prepended to the already concatenated tensors (Default={}). - /// \param[in] append MSTensor to be appended to the already concatenated tensors (Default={}). + /// \param[in] axis Concatenate the tensors along given axis, only support 0 or -1 so far (default=0). + /// \param[in] prepend MSTensor to be prepended to the already concatenated tensors (default={}). + /// \param[in] append MSTensor to be appended to the already concatenated tensors (default={}). explicit Concatenate(int8_t axis = 0, MSTensor prepend = {}, MSTensor append = {}); /// \brief Destructor @@ -227,7 +227,8 @@ class Mask final : public TensorTransform { /// \param[in] op One of the relational operators EQ, NE LT, GT, LE or GE. /// \param[in] constant Constant to be compared to. /// Can only be MSTensor of str, int, float, bool. - /// \param[in] de_type Type of the generated mask (Default to be mindspore::DataType::kNumberTypeBool). + /// \param[in] de_type Type of the generated mask. Can only be numeric or boolean datatype. + /// (default=mindspore::DataType::kNumberTypeBool) explicit Mask(RelationalOp op, MSTensor constant, mindspore::DataType ms_type = mindspore::DataType(mindspore::DataType::kNumberTypeBool)); @@ -273,7 +274,7 @@ class PadEnd final : public TensorTransform { /// \param[in] pad_shape List of integers representing the shape needed. /// Dimensions that set to `None` will not be padded (i.e., original dim will be used). /// Shorter dimensions will truncate the values. - /// \param[in] pad_value Value used to pad. Default to be {}. + /// \param[in] pad_value Value used to pad (default={}). explicit PadEnd(const std::vector &pad_shape, MSTensor pad_value = {}); /// \brief Destructor diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc index 6ee48ea306..7434be6475 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc @@ -139,7 +139,14 @@ Status FillOperation::to_json(nlohmann::json *out_json) { MaskOperation::MaskOperation(RelationalOp op, const std::shared_ptr &constant, DataType dtype) : op_(op), constant_(constant), dtype_(dtype) {} -Status MaskOperation::ValidateParams() { return Status::OK(); } +Status MaskOperation::ValidateParams() { + if (!dtype_.IsBool() && !dtype_.IsFloat() && !dtype_.IsInt()) { + std::string err_msg = "Mask: Only supports bool or numeric datatype for generated mask type."; + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + return Status::OK(); +} std::shared_ptr MaskOperation::Build() { return std::make_shared(op_, constant_, dtype_); } #endif diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index 4b221c6014..3bbc92fb50 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -163,7 +163,7 @@ class _SliceOption(cde.SliceOption): super().__init__(slice_option) -class Slice(): +class Slice(TensorOperation): """ Slice operation to extract a tensor out using the given n slices. @@ -226,7 +226,7 @@ DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ, Relational.LE: cde.RelationalOp.LE} -class Mask(): +class Mask(TensorOperation): """ Mask content of the input tensor with the given predicate. Any element of the tensor that matches the predicate will be evaluated to True, otherwise False. @@ -264,7 +264,7 @@ class Mask(): return cde.MaskOperation(DE_C_RELATIONAL[self.operator], self.constant, self.dtype) -class PadEnd(): +class PadEnd(TensorOperation): """ Pad input tensor according to pad_shape, need to have same rank. @@ -300,7 +300,7 @@ class PadEnd(): return cde.PadEndOperation(self.pad_shape, self.pad_value) -class Concatenate(): +class Concatenate(TensorOperation): """ Tensor operation that concatenates all columns into a single tensor. diff --git a/tests/ut/cpp/dataset/c_api_samplers_test.cc b/tests/ut/cpp/dataset/c_api_samplers_test.cc index 431321eeab..b8d156ecf5 100644 --- a/tests/ut/cpp/dataset/c_api_samplers_test.cc +++ b/tests/ut/cpp/dataset/c_api_samplers_test.cc @@ -325,3 +325,119 @@ TEST_F(MindDataTestPipeline, TestSamplerAddChild) { EXPECT_EQ(ds->GetDatasetSize(), 5); iter->Stop(); } + +TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess1."; + // Test basic setting of subset_sampler with default num_samples + + std::vector indices = {2, 4, 6, 8, 10, 12}; + std::shared_ptr sampl = std::make_shared(indices); + EXPECT_NE(sampl, nullptr); + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, false, sampl); + EXPECT_NE(ds, nullptr); + + // Iterate the dataset and get each row + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 6); + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess2."; + // Test subset_sampler with num_samples + + std::vector indices = {2, 4, 6, 8, 10, 12}; + std::shared_ptr sampl = std::make_shared(indices, 3); + EXPECT_NE(sampl, nullptr); + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, false, sampl); + EXPECT_NE(ds, nullptr); + + // Iterate the dataset and get each row + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 3); + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess3."; + // Test subset_sampler with num_samples larger than the indices size. + + std::vector indices = {2, 4, 6, 8, 10, 12}; + std::shared_ptr sampl = std::make_shared(indices, 8); + EXPECT_NE(sampl, nullptr); + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, false, sampl); + EXPECT_NE(ds, nullptr); + + // Iterate the dataset and get each row + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 6); + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerFail."; + // Test subset_sampler with index out of bounds. + + std::vector indices = {2, 4, 6, 8, 10, 100}; // Sample ID (100) is out of bound + std::shared_ptr sampl = std::make_shared(indices); + EXPECT_NE(sampl, nullptr); + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, false, sampl); + EXPECT_NE(ds, nullptr); + + // Iterate the dataset and get each row + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + std::unordered_map row; + // Expect failure: index 100 is out of dataset bounds + EXPECT_ERROR(iter->GetNextRow(&row)); + + iter->Stop(); +} diff --git a/tests/ut/cpp/dataset/c_api_transforms_test.cc b/tests/ut/cpp/dataset/c_api_transforms_test.cc index 13958f0280..d314895c04 100644 --- a/tests/ut/cpp/dataset/c_api_transforms_test.cc +++ b/tests/ut/cpp/dataset/c_api_transforms_test.cc @@ -17,6 +17,8 @@ #include "minddata/dataset/include/datasets.h" #include "minddata/dataset/include/transforms.h" #include "minddata/dataset/include/vision.h" +#include "mindspore/ccsrc/minddata/dataset/core/tensor.h" +#include "mindspore/ccsrc/minddata/dataset/core/data_type.h" using namespace mindspore::dataset; using mindspore::dataset::BorderType; @@ -137,8 +139,9 @@ TEST_F(MindDataTestPipeline, TestComposeFail3) { EXPECT_EQ(iter, nullptr); } -TEST_F(MindDataTestPipeline, TestConcatenateSuccess) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateSuccess."; +TEST_F(MindDataTestPipeline, TestConcatenateSuccess1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateSuccess1."; + // Test basic concatenate with prepend and append // Create a RandomDataset u_int32_t curr_seed = GlobalContext::config_manager()->seed(); @@ -151,13 +154,13 @@ TEST_F(MindDataTestPipeline, TestConcatenateSuccess) { EXPECT_NE(ds, nullptr); // Create Concatenate op - std::vector prepend_vector = {1, 2}; + std::vector prepend_vector = {1, 2}; std::shared_ptr prepend_tensor; ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); mindspore::MSTensor prepend_MSTensor = mindspore::MSTensor(std::make_shared(prepend_tensor)); - std::vector append_vector = {3}; + std::vector append_vector = {3}; std::shared_ptr append_tensor; ASSERT_OK(Tensor::CreateFromVector(append_vector, &append_tensor)); mindspore::MSTensor append_MSTensor = @@ -178,10 +181,10 @@ TEST_F(MindDataTestPipeline, TestConcatenateSuccess) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - std::vector> expected = { + std::vector> expected = { {1, 2, 31354, 3}, {1, 2, -5655, 3}, {1, 2, -17734, 3}, {1, 2, -17220, 3}}; - // Check concatnate results + // Check concatenate results uint64_t i = 0; while (row.size() != 0) { auto ind = row["col1"]; @@ -201,19 +204,24 @@ TEST_F(MindDataTestPipeline, TestConcatenateSuccess) { GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestDuplicateSuccess) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDuplicateSuccess."; +TEST_F(MindDataTestPipeline, TestConcatenateSuccess2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateSuccess2."; + // Test concatenate with no input - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); EXPECT_NE(ds, nullptr); - // Create objects for the tensor ops - transforms::Duplicate duplicate = transforms::Duplicate(); + transforms::Concatenate concatenate = transforms::Concatenate(); // Create a Map operation on ds - ds = ds->Map({duplicate}, {"image"}, {"image", "image_copy"}); + ds = ds->Map({concatenate}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -225,42 +233,67 @@ TEST_F(MindDataTestPipeline, TestDuplicateSuccess) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); + // The data generated by RandomData + std::vector> expected = {{31354}, {-5655}, {-17734}, {-17220}}; + + // Check concatenate results uint64_t i = 0; while (row.size() != 0) { - i++; - auto image = row["image"]; - auto image_copy = row["image_copy"]; - MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); - EXPECT_MSTENSOR_EQ(image, image_copy); + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); ASSERT_OK(iter->GetNextRow(&row)); + i++; } - EXPECT_EQ(i, 10); + EXPECT_EQ(i, 4); // Manually terminate the pipeline iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestFillSuccessInt) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessInt."; +TEST_F(MindDataTestPipeline, TestConcatenateSuccess3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateSuccess3."; + // Test concatenate of string - // Create a RandomDataset with Int32 numbers for given shape - u_int32_t curr_seed = GlobalContext::config_manager()->seed(); - GlobalContext::config_manager()->set_seed(864); - std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt32, {6})); - std::shared_ptr ds = RandomData(5, schema); + // Create a TextFile dataset + std::string data_file = datasets_root_path_ + "/testTokenizerData/1.txt"; + std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); EXPECT_NE(ds, nullptr); - ds = ds->SetNumWorkers(3); + + // Create Take operation on ds + ds = ds->Take(1); EXPECT_NE(ds, nullptr); - // Create Fill op - to fill with 3 - std::shared_ptr fill_value_tensor; - ASSERT_OK(Tensor::CreateScalar(3, &fill_value_tensor)); - mindspore::MSTensor fill_value_MSTensor = - mindspore::MSTensor(std::make_shared(fill_value_tensor)); - transforms::Fill mask = transforms::Fill(fill_value_MSTensor); - ds = ds->Map({mask}, {"col1"}); + // Create BasicTokenizer operation on ds + std::shared_ptr basic_tokenizer = std::make_shared(true); + EXPECT_NE(basic_tokenizer, nullptr); + + // Create Map operation on ds + ds = ds->Map({basic_tokenizer}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create Concatenate op + std::vector prepend_vector = {"1", "2"}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + std::vector append_vector = {"3"}; + std::shared_ptr append_tensor; + ASSERT_OK(Tensor::CreateFromVector(append_vector, &append_tensor)); + mindspore::MSTensor append_MSTensor = + mindspore::MSTensor(std::make_shared(append_tensor)); + + transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor, append_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"text"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -272,50 +305,59 @@ TEST_F(MindDataTestPipeline, TestFillSuccessInt) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - std::vector> expected = { - {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}}; + std::vector> expected = {{"1", "2", "welcome", "to", "beijing", "!", "3"}}; + // Check concatenate results uint64_t i = 0; while (row.size() != 0) { - auto ind = row["col1"]; - TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + auto ind = row["text"]; std::shared_ptr de_expected_tensor; ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); mindspore::MSTensor expected_tensor = mindspore::MSTensor(std::make_shared(de_expected_tensor)); EXPECT_MSTENSOR_EQ(ind, expected_tensor); - ASSERT_OK(iter->GetNextRow(&row)); i++; } - EXPECT_EQ(i, 5); + EXPECT_EQ(i, 1); // Manually terminate the pipeline iter->Stop(); - GlobalContext::config_manager()->set_seed(curr_seed); + // GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestFillSuccessBool) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessBool."; +TEST_F(MindDataTestPipeline, TestConcatenateSuccess4) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateSuccess4."; + // Test concatenate with negative axis - // Create a RandomDataset with bool values for given shape + // Create a RandomDataset u_int32_t curr_seed = GlobalContext::config_manager()->seed(); - GlobalContext::config_manager()->set_seed(963); + GlobalContext::config_manager()->set_seed(246); std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeBool, {4})); - std::shared_ptr ds = RandomData(3, schema); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(4, schema); EXPECT_NE(ds, nullptr); ds = ds->SetNumWorkers(2); EXPECT_NE(ds, nullptr); - // Create Fill op - to fill with zero - std::shared_ptr fill_value_tensor; - ASSERT_OK(Tensor::CreateScalar((bool)true, &fill_value_tensor)); - mindspore::MSTensor fill_value_MSTensor = - mindspore::MSTensor(std::make_shared(fill_value_tensor)); - transforms::Fill mask = transforms::Fill(fill_value_MSTensor); - ds = ds->Map({mask}, {"col1"}); + // Create Concatenate op + std::vector prepend_vector = {1, 2}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + std::vector append_vector = {3}; + std::shared_ptr append_tensor; + ASSERT_OK(Tensor::CreateFromVector(append_vector, &append_tensor)); + mindspore::MSTensor append_MSTensor = + mindspore::MSTensor(std::make_shared(append_tensor)); + + transforms::Concatenate concatenate = transforms::Concatenate(-1, prepend_MSTensor, append_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -327,105 +369,109 @@ TEST_F(MindDataTestPipeline, TestFillSuccessBool) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - std::vector> expected = { - {true, true, true, true}, {true, true, true, true}, {true, true, true, true}}; + std::vector> expected = { + {1, 2, 31354, 3}, {1, 2, -5655, 3}, {1, 2, -17734, 3}, {1, 2, -17220, 3}}; + // Check concatenate results uint64_t i = 0; while (row.size() != 0) { auto ind = row["col1"]; - TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); std::shared_ptr de_expected_tensor; ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); mindspore::MSTensor expected_tensor = mindspore::MSTensor(std::make_shared(de_expected_tensor)); EXPECT_MSTENSOR_EQ(ind, expected_tensor); - ASSERT_OK(iter->GetNextRow(&row)); i++; } - EXPECT_EQ(i, 3); + EXPECT_EQ(i, 4); // Manually terminate the pipeline iter->Stop(); GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecast) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecast."; +TEST_F(MindDataTestPipeline, TestConcatenateFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateFail1."; + // Test concatenate with type mismatch - // Create a RandomDataset with UInt8 numbers for given shape + // Create a RandomDataset u_int32_t curr_seed = GlobalContext::config_manager()->seed(); - GlobalContext::config_manager()->set_seed(963); + GlobalContext::config_manager()->set_seed(246); std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4})); - std::shared_ptr ds = RandomData(3, schema); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(1, schema); EXPECT_NE(ds, nullptr); - ds = ds->SetNumWorkers(2); + ds = ds->SetNumWorkers(1); EXPECT_NE(ds, nullptr); - // Create Fill op - to fill with -3 - std::shared_ptr fill_value_tensor; - ASSERT_OK(Tensor::CreateScalar(-3, &fill_value_tensor)); - mindspore::MSTensor fill_value_MSTensor = - mindspore::MSTensor(std::make_shared(fill_value_tensor)); - transforms::Fill mask = transforms::Fill(fill_value_MSTensor); - ds = ds->Map({mask}, {"col1"}); + // Create Concatenate op + std::vector prepend_vector = {"1", "2"}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + std::vector append_vector = {"3"}; + std::shared_ptr append_tensor; + ASSERT_OK(Tensor::CreateFromVector(append_vector, &append_tensor)); + mindspore::MSTensor append_MSTensor = + mindspore::MSTensor(std::make_shared(append_tensor)); + + transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor, append_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset // This will trigger the creation of the Execution Tree and launch it. std::shared_ptr iter = ds->CreateIterator(); + // EXPECT_EQ(iter, nullptr); EXPECT_NE(iter, nullptr); // Iterate the dataset and get each row std::unordered_map row; - ASSERT_OK(iter->GetNextRow(&row)); - - // Note: 2**8 -3 = 256 -3 = 253 - std::vector> expected = {{253, 253, 253, 253}, {253, 253, 253, 253}, {253, 253, 253, 253}}; - - uint64_t i = 0; - while (row.size() != 0) { - auto ind = row["col1"]; - TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); - std::shared_ptr de_expected_tensor; - ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); - mindspore::MSTensor expected_tensor = - mindspore::MSTensor(std::make_shared(de_expected_tensor)); - EXPECT_MSTENSOR_EQ(ind, expected_tensor); - - ASSERT_OK(iter->GetNextRow(&row)); - i++; - } - - EXPECT_EQ(i, 3); + // Expect failure: type mismatch, concatenate string tensor to dataset of Int16 + EXPECT_ERROR(iter->GetNextRow(&row)); // Manually terminate the pipeline iter->Stop(); GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecastZero) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecastZero."; +TEST_F(MindDataTestPipeline, TestConcatenateFail2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateFail2."; + // Test concatenate with incorrect dimension - // Create a RandomDataset with UInt8 numbers for given shape + // Create a RandomDataset u_int32_t curr_seed = GlobalContext::config_manager()->seed(); - GlobalContext::config_manager()->set_seed(963); + GlobalContext::config_manager()->set_seed(246); std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4})); - std::shared_ptr ds = RandomData(3, schema); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1, 2})); + std::shared_ptr ds = RandomData(1, schema); EXPECT_NE(ds, nullptr); - ds = ds->SetNumWorkers(2); + ds = ds->SetNumWorkers(1); EXPECT_NE(ds, nullptr); - // Create Fill op - to fill with zero - std::shared_ptr fill_value_tensor; - ASSERT_OK(Tensor::CreateScalar(0, &fill_value_tensor)); - mindspore::MSTensor fill_value_MSTensor = - mindspore::MSTensor(std::make_shared(fill_value_tensor)); - transforms::Fill mask = transforms::Fill(fill_value_MSTensor); - ds = ds->Map({mask}, {"col1"}); + // Create Concatenate op + std::vector prepend_vector = {1, 2}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + std::vector append_vector = {3}; + std::shared_ptr append_tensor; + ASSERT_OK(Tensor::CreateFromVector(append_vector, &append_tensor)); + mindspore::MSTensor append_MSTensor = + mindspore::MSTensor(std::make_shared(append_tensor)); + + transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor, append_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -435,104 +481,112 @@ TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecastZero) { // Iterate the dataset and get each row std::unordered_map row; - ASSERT_OK(iter->GetNextRow(&row)); - - // Note: 2**8 = 256 - std::vector> expected = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}; - - uint64_t i = 0; - while (row.size() != 0) { - auto ind = row["col1"]; - TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); - std::shared_ptr de_expected_tensor; - ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); - mindspore::MSTensor expected_tensor = - mindspore::MSTensor(std::make_shared(de_expected_tensor)); - EXPECT_MSTENSOR_EQ(ind, expected_tensor); - - ASSERT_OK(iter->GetNextRow(&row)); - i++; - } - - EXPECT_EQ(i, 3); + // Expect failure: concatenate on 2D dataset, only support 1D concatenate so far + EXPECT_ERROR(iter->GetNextRow(&row)); // Manually terminate the pipeline iter->Stop(); GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecast16) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecast16."; +TEST_F(MindDataTestPipeline, TestConcatenateFail3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateFail3."; + // Test concatenate with wrong axis - // Create a RandomDataset with UInt16 numbers for given shape + // Create a RandomDataset u_int32_t curr_seed = GlobalContext::config_manager()->seed(); - GlobalContext::config_manager()->set_seed(963); + GlobalContext::config_manager()->set_seed(246); std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeUInt16, {4})); - std::shared_ptr ds = RandomData(3, schema); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(1, schema); EXPECT_NE(ds, nullptr); - ds = ds->SetNumWorkers(2); + ds = ds->SetNumWorkers(1); EXPECT_NE(ds, nullptr); - // Create Fill op - to fill with -3 - std::shared_ptr fill_value_tensor; - ASSERT_OK(Tensor::CreateScalar(-3, &fill_value_tensor)); - mindspore::MSTensor fill_value_MSTensor = - mindspore::MSTensor(std::make_shared(fill_value_tensor)); - transforms::Fill mask = transforms::Fill(fill_value_MSTensor); - ds = ds->Map({mask}, {"col1"}); + // Create Concatenate op + std::vector prepend_vector = {1, 2}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + std::vector append_vector = {3}; + std::shared_ptr append_tensor; + ASSERT_OK(Tensor::CreateFromVector(append_vector, &append_tensor)); + mindspore::MSTensor append_MSTensor = + mindspore::MSTensor(std::make_shared(append_tensor)); + // The parameter axis support 0 or -1 only for now + transforms::Concatenate concatenate = transforms::Concatenate(2, prepend_MSTensor, append_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset // This will trigger the creation of the Execution Tree and launch it. std::shared_ptr iter = ds->CreateIterator(); - EXPECT_NE(iter, nullptr); + // Expect failure: wrong axis, axis can only be 0 or -1 + EXPECT_EQ(iter, nullptr); + + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestDuplicateSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDuplicateSuccess."; + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + transforms::Duplicate duplicate = transforms::Duplicate(); + + // Create a Map operation on ds + ds = ds->Map({duplicate}, {"image"}, {"image", "image_copy"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); // Iterate the dataset and get each row std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - // Note: 2**16 -3 = 65536 -3 = 65533 - std::vector> expected = { - {65533, 65533, 65533, 65533}, {65533, 65533, 65533, 65533}, {65533, 65533, 65533, 65533}}; - uint64_t i = 0; while (row.size() != 0) { - auto ind = row["col1"]; - TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); - std::shared_ptr de_expected_tensor; - ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); - mindspore::MSTensor expected_tensor = - mindspore::MSTensor(std::make_shared(de_expected_tensor)); - EXPECT_MSTENSOR_EQ(ind, expected_tensor); - - ASSERT_OK(iter->GetNextRow(&row)); i++; + auto image = row["image"]; + auto image_copy = row["image_copy"]; + MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + EXPECT_MSTENSOR_EQ(image, image_copy); + ASSERT_OK(iter->GetNextRow(&row)); } - EXPECT_EQ(i, 3); + EXPECT_EQ(i, 10); // Manually terminate the pipeline iter->Stop(); - GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestFillSuccessUpTypecast) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessUpTypecast."; +TEST_F(MindDataTestPipeline, TestFillSuccessInt) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessInt."; - // Create a RandomDataset with Float numbers for given shape + // Create a RandomDataset with Int32 numbers for given shape u_int32_t curr_seed = GlobalContext::config_manager()->seed(); - GlobalContext::config_manager()->set_seed(963); + GlobalContext::config_manager()->set_seed(864); std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeFloat32, {2})); - std::shared_ptr ds = RandomData((float)4.0, schema); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt32, {6})); + std::shared_ptr ds = RandomData(5, schema); EXPECT_NE(ds, nullptr); - ds = ds->SetNumWorkers(2); + ds = ds->SetNumWorkers(3); EXPECT_NE(ds, nullptr); - // Create Fill op - to fill with zeroes + // Create Fill op - to fill with 3 std::shared_ptr fill_value_tensor; - ASSERT_OK(Tensor::CreateScalar(0, &fill_value_tensor)); + ASSERT_OK(Tensor::CreateScalar(3, &fill_value_tensor)); mindspore::MSTensor fill_value_MSTensor = mindspore::MSTensor(std::make_shared(fill_value_tensor)); transforms::Fill mask = transforms::Fill(fill_value_MSTensor); @@ -548,7 +602,8 @@ TEST_F(MindDataTestPipeline, TestFillSuccessUpTypecast) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - std::vector> expected = {{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}; + std::vector> expected = { + {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}}; uint64_t i = 0; while (row.size() != 0) { @@ -564,40 +619,33 @@ TEST_F(MindDataTestPipeline, TestFillSuccessUpTypecast) { i++; } - EXPECT_EQ(i, 4); + EXPECT_EQ(i, 5); // Manually terminate the pipeline iter->Stop(); GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestFillSuccessString) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessString."; - - // Create a TextFile dataset - std::string data_file = datasets_root_path_ + "/testTokenizerData/basic_tokenizer.txt"; - std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); - EXPECT_NE(ds, nullptr); +TEST_F(MindDataTestPipeline, TestFillSuccessBool) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessBool."; - // Create Skip operation on ds - ds = ds->Skip(6); + // Create a RandomDataset with bool values for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeBool, {4})); + std::shared_ptr ds = RandomData(3, schema); EXPECT_NE(ds, nullptr); - - // Create BasicTokenizer operation on ds - std::shared_ptr basic_tokenizer = std::make_shared(true); - EXPECT_NE(basic_tokenizer, nullptr); - - // Create Map operation on ds - ds = ds->Map({basic_tokenizer}, {"text"}); + ds = ds->SetNumWorkers(2); EXPECT_NE(ds, nullptr); - // Create Fill op - to fill with string + // Create Fill op - to fill with zero std::shared_ptr fill_value_tensor; - ASSERT_OK(Tensor::CreateScalar("Hello", &fill_value_tensor)); + ASSERT_OK(Tensor::CreateScalar((bool)true, &fill_value_tensor)); mindspore::MSTensor fill_value_MSTensor = mindspore::MSTensor(std::make_shared(fill_value_tensor)); transforms::Fill mask = transforms::Fill(fill_value_MSTensor); - ds = ds->Map({mask}, {"text"}); + ds = ds->Map({mask}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -609,85 +657,103 @@ TEST_F(MindDataTestPipeline, TestFillSuccessString) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - std::vector expected = {"Hello", "Hello", "Hello", "Hello", "Hello"}; - std::shared_ptr de_expected_tensor; - ASSERT_OK(Tensor::CreateFromVector(expected, &de_expected_tensor)); - mindspore::MSTensor expected_tensor = - mindspore::MSTensor(std::make_shared(de_expected_tensor)); + std::vector> expected = { + {true, true, true, true}, {true, true, true, true}, {true, true, true, true}}; uint64_t i = 0; while (row.size() != 0) { - auto ind = row["text"]; + auto ind = row["col1"]; TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); EXPECT_MSTENSOR_EQ(ind, expected_tensor); ASSERT_OK(iter->GetNextRow(&row)); i++; } - EXPECT_EQ(i, 1); + EXPECT_EQ(i, 3); // Manually terminate the pipeline iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestFillFailFillValueNotScalar) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillFailFillValueNotScalar."; - // Test BasicTokenizer with lower_case true - - // Create a TextFile dataset - std::string data_file = datasets_root_path_ + "/testTokenizerData/basic_tokenizer.txt"; - std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); - EXPECT_NE(ds, nullptr); +TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecast) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecast."; - // Create Skip operation on ds - ds = ds->Skip(6); + // Create a RandomDataset with UInt8 numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4})); + std::shared_ptr ds = RandomData(3, schema); EXPECT_NE(ds, nullptr); - - // Create BasicTokenizer operation on ds - std::shared_ptr basic_tokenizer = std::make_shared(true); - EXPECT_NE(basic_tokenizer, nullptr); - - // Create Map operation on ds - ds = ds->Map({basic_tokenizer}, {"text"}); + ds = ds->SetNumWorkers(2); EXPECT_NE(ds, nullptr); - // Create Fill op - with wrongful vector shape instead of scalar - std::vector fill_string = {"ERROR"}; + // Create Fill op - to fill with -3 std::shared_ptr fill_value_tensor; - ASSERT_OK(Tensor::CreateFromVector(fill_string, &fill_value_tensor)); + ASSERT_OK(Tensor::CreateScalar(-3, &fill_value_tensor)); mindspore::MSTensor fill_value_MSTensor = mindspore::MSTensor(std::make_shared(fill_value_tensor)); transforms::Fill mask = transforms::Fill(fill_value_MSTensor); - ds = ds->Map({mask}, {"text"}); + ds = ds->Map({mask}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset // This will trigger the creation of the Execution Tree and launch it. std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); - // Expect failure: invalid Fill parameter (the shape of fill_value is not a scalar) - EXPECT_EQ(iter, nullptr); + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + // Note: 2**8 -3 = 256 -3 = 253 + std::vector> expected = {{253, 253, 253, 253}, {253, 253, 253, 253}, {253, 253, 253, 253}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestMaskSuccess) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskSuccess."; +TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecastZero) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecastZero."; - // Create a RandomDataset + // Create a RandomDataset with UInt8 numbers for given shape u_int32_t curr_seed = GlobalContext::config_manager()->seed(); - GlobalContext::config_manager()->set_seed(246); + GlobalContext::config_manager()->set_seed(963); std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {4})); - std::shared_ptr ds = RandomData(4, schema); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4})); + std::shared_ptr ds = RandomData(3, schema); EXPECT_NE(ds, nullptr); ds = ds->SetNumWorkers(2); EXPECT_NE(ds, nullptr); - // Create Mask op - std::shared_ptr constant_tensor; - ASSERT_OK(Tensor::CreateScalar(0, &constant_tensor)); - mindspore::MSTensor constant_MSTensor = - mindspore::MSTensor(std::make_shared(constant_tensor)); - transforms::Mask mask = transforms::Mask(RelationalOp::kGreater, constant_MSTensor); + // Create Fill op - to fill with zero + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar(0, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); ds = ds->Map({mask}, {"col1"}); EXPECT_NE(ds, nullptr); @@ -700,12 +766,13 @@ TEST_F(MindDataTestPipeline, TestMaskSuccess) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - std::vector> expected = { - {true, true, true, true}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}; + // Note: 2**8 = 256 + std::vector> expected = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}; uint64_t i = 0; while (row.size() != 0) { auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); std::shared_ptr de_expected_tensor; ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); mindspore::MSTensor expected_tensor = @@ -715,46 +782,33 @@ TEST_F(MindDataTestPipeline, TestMaskSuccess) { i++; } - EXPECT_EQ(i, 4); + EXPECT_EQ(i, 3); // Manually terminate the pipeline iter->Stop(); GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestOneHotSuccess1) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotSuccess1."; - // Testing CutMixBatch on a batch of CHW images - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - int number_of_classes = 10; - std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); - EXPECT_NE(ds, nullptr); - - // Create objects for the tensor ops - std::shared_ptr hwc_to_chw = std::make_shared(); - - // Create a Map operation on ds - ds = ds->Map({hwc_to_chw}, {"image"}); - EXPECT_NE(ds, nullptr); +TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecast16) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecast16."; - // Create a Batch operation on ds - int32_t batch_size = 5; - ds = ds->Batch(batch_size); + // Create a RandomDataset with UInt16 numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeUInt16, {4})); + std::shared_ptr ds = RandomData(3, schema); EXPECT_NE(ds, nullptr); - - // Create objects for the tensor ops - std::shared_ptr one_hot_op = std::make_shared(number_of_classes); - - // Create a Map operation on ds - ds = ds->Map({one_hot_op}, {"label"}); + ds = ds->SetNumWorkers(2); EXPECT_NE(ds, nullptr); - std::shared_ptr cutmix_batch_op = - std::make_shared(mindspore::dataset::ImageBatchFormat::kNCHW, 1.0, 1.0); - - // Create a Map operation on ds - ds = ds->Map({cutmix_batch_op}, {"image", "label"}); + // Create Fill op - to fill with -3 + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar(-3, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -766,50 +820,1071 @@ TEST_F(MindDataTestPipeline, TestOneHotSuccess1) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); + // Note: 2**16 -3 = 65536 -3 = 65533 + std::vector> expected = { + {65533, 65533, 65533, 65533}, {65533, 65533, 65533, 65533}, {65533, 65533, 65533, 65533}}; + uint64_t i = 0; while (row.size() != 0) { - i++; - auto image = row["image"]; - auto label = row["label"]; + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestFillSuccessUpTypecast) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessUpTypecast."; + + // Create a RandomDataset with Float numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeFloat32, {2})); + std::shared_ptr ds = RandomData((float)4.0, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with zeroes + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar(0, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = {{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestFillSuccessString) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessString."; + + // Create a TextFile dataset + std::string data_file = datasets_root_path_ + "/testTokenizerData/basic_tokenizer.txt"; + std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create Skip operation on ds + ds = ds->Skip(6); + EXPECT_NE(ds, nullptr); + + // Create BasicTokenizer operation on ds + std::shared_ptr basic_tokenizer = std::make_shared(true); + EXPECT_NE(basic_tokenizer, nullptr); + + // Create Map operation on ds + ds = ds->Map({basic_tokenizer}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with string + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar("Hello", &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector expected = {"Hello", "Hello", "Hello", "Hello", "Hello"}; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected, &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["text"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 1); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestFillFailFillValueNotScalar) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillFailFillValueNotScalar."; + // Test BasicTokenizer with lower_case true + + // Create a TextFile dataset + std::string data_file = datasets_root_path_ + "/testTokenizerData/basic_tokenizer.txt"; + std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create Skip operation on ds + ds = ds->Skip(6); + EXPECT_NE(ds, nullptr); + + // Create BasicTokenizer operation on ds + std::shared_ptr basic_tokenizer = std::make_shared(true); + EXPECT_NE(basic_tokenizer, nullptr); + + // Create Map operation on ds + ds = ds->Map({basic_tokenizer}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create Fill op - with wrongful vector shape instead of scalar + std::vector fill_string = {"ERROR"}; + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateFromVector(fill_string, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + + // Expect failure: invalid Fill parameter (the shape of fill_value is not a scalar) + EXPECT_EQ(iter, nullptr); +} + +TEST_F(MindDataTestPipeline, TestMaskSuccess1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskSuccess1."; + // Test Mask random int dataset with int + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {4})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create an int Mask op + std::shared_ptr constant_tensor; + ASSERT_OK(Tensor::CreateScalar(0, &constant_tensor)); + mindspore::MSTensor constant_MSTensor = + mindspore::MSTensor(std::make_shared(constant_tensor)); + transforms::Mask mask = transforms::Mask(RelationalOp::kGreater, constant_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = { + {true, true, true, true}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestMaskSuccess2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskSuccess2."; + // Test Mask random float dataset with float + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeFloat16, {4})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create a float Mask op + std::shared_ptr constant_tensor; + ASSERT_OK(Tensor::CreateScalar(-1.1, &constant_tensor)); + mindspore::MSTensor constant_MSTensor = + mindspore::MSTensor(std::make_shared(constant_tensor)); + // Use explicit input ms_type(kNumberTypeBool) as the mask return type + transforms::Mask mask = + transforms::Mask(RelationalOp::kLessEqual, constant_MSTensor, mindspore::DataType::kNumberTypeBool); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = { + {false, false, false, false}, {true, true, true, true}, {false, false, false, false}, {true, true, true, true}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + + // Test Mask result boolean dataset with boolean + + // Create another boolean Mask op + std::shared_ptr constant_tensor2; + ASSERT_OK(Tensor::CreateScalar(false, &constant_tensor2)); + mindspore::MSTensor constant_MSTensor2 = + mindspore::MSTensor(std::make_shared(constant_tensor2)); + transforms::Mask mask2 = transforms::Mask(RelationalOp::kLessEqual, constant_MSTensor2); + ds = ds->Map({mask2}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected2 = { + {true, true, true, true}, {false, false, false, false}, {true, true, true, true}, {false, false, false, false}}; + + i = 0; + while (row.size() != 0) { + auto ind2 = row["col1"]; + std::shared_ptr de_expected_tensor2; + ASSERT_OK(Tensor::CreateFromVector(expected2[i], &de_expected_tensor2)); + mindspore::MSTensor expected_tensor2 = + mindspore::MSTensor(std::make_shared(de_expected_tensor2)); + EXPECT_MSTENSOR_EQ(ind2, expected_tensor2); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestMaskSuccess3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskSuccess3."; + // Test Mask random text dataset with string + + // Create a TextFile dataset + std::string data_file = datasets_root_path_ + "/testTokenizerData/1.txt"; + std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create Take operation on ds + ds = ds->Take(1); + EXPECT_NE(ds, nullptr); + + // Create BasicTokenizer operation on ds + std::shared_ptr basic_tokenizer = std::make_shared(true); + EXPECT_NE(basic_tokenizer, nullptr); + + // Create Map operation on ds + ds = ds->Map({basic_tokenizer}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create a string Mask op + std::shared_ptr constant_tensor; + ASSERT_OK(Tensor::CreateScalar("to", &constant_tensor)); + mindspore::MSTensor constant_MSTensor = + mindspore::MSTensor(std::make_shared(constant_tensor)); + // Use kNumberTypeInt16 as an explicit ms_type parameter for the mask return type, + // instead of using default kNumberTypeBool. + transforms::Mask mask = + transforms::Mask(RelationalOp::kEqual, constant_MSTensor, mindspore::DataType::kNumberTypeInt16); + ds = ds->Map({mask}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = {{0, 1, 0, 0}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["text"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 1); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestMaskFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskFail1."; + // Test Mask with nun-numeric datatype as output result. + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {4})); + std::shared_ptr ds = RandomData(1, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(1); + EXPECT_NE(ds, nullptr); + + // Create an int Mask op + std::shared_ptr constant_tensor; + ASSERT_OK(Tensor::CreateScalar(0, &constant_tensor)); + mindspore::MSTensor constant_MSTensor = + mindspore::MSTensor(std::make_shared(constant_tensor)); + transforms::Mask mask = + transforms::Mask(RelationalOp::kGreater, constant_MSTensor, mindspore::DataType::kObjectTypeString); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure: using string as output datatype which is invalid + EXPECT_EQ(iter, nullptr); + + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestMaskFail2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskFail2."; + // Test Mask with mismatched datatype. + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {4})); + std::shared_ptr ds = RandomData(1, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(1); + EXPECT_NE(ds, nullptr); + + // Create a string Mask op + std::shared_ptr constant_tensor; + ASSERT_OK(Tensor::CreateScalar("0", &constant_tensor)); + mindspore::MSTensor constant_MSTensor = + mindspore::MSTensor(std::make_shared(constant_tensor)); + transforms::Mask mask = transforms::Mask(RelationalOp::kGreater, constant_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + // Expect failure: mismatched datatype, mask Int16 with string + EXPECT_ERROR(iter->GetNextRow(&row)); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestOneHotSuccess1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotSuccess1."; + // Testing CutMixBatch on a batch of CHW images + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + int number_of_classes = 10; + std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr hwc_to_chw = std::make_shared(); + + // Create a Map operation on ds + ds = ds->Map({hwc_to_chw}, {"image"}); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 5; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr one_hot_op = std::make_shared(number_of_classes); + + // Create a Map operation on ds + ds = ds->Map({one_hot_op}, {"label"}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr cutmix_batch_op = + std::make_shared(mindspore::dataset::ImageBatchFormat::kNCHW, 1.0, 1.0); + + // Create a Map operation on ds + ds = ds->Map({cutmix_batch_op}, {"image", "label"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + auto label = row["label"]; + MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + MS_LOG(INFO) << "Label shape: " << label.Shape(); + EXPECT_EQ(image.Shape().size() == 4 && batch_size == image.Shape()[0] && 3 == image.Shape()[1] && + 32 == image.Shape()[2] && 32 == image.Shape()[3], + true); + EXPECT_EQ(label.Shape().size() == 2 && batch_size == label.Shape()[0] && number_of_classes == label.Shape()[1], + true); + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 2); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestOneHotSuccess2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotSuccess2."; + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 5; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr one_hot_op = std::make_shared(10); + + // Create a Map operation on ds + ds = ds->Map({one_hot_op}, {"label"}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr mixup_batch_op = std::make_shared(2.0); + + // Create a Map operation on ds + ds = ds->Map({mixup_batch_op}, {"image", "label"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 2); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestOneHotFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotFail1 with invalid params."; + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + EXPECT_NE(ds, nullptr); + + // incorrect num_class + std::shared_ptr one_hot_op = std::make_shared(0); + + // Create a Map operation on ds + ds = ds->Map({one_hot_op}, {"label"}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure: invalid OneHot input + EXPECT_EQ(iter, nullptr); +} + +TEST_F(MindDataTestPipeline, TestOneHotFail2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotFail2 with invalid params."; + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + EXPECT_NE(ds, nullptr); + + // incorrect num_class + std::shared_ptr one_hot_op = std::make_shared(-5); + + // Create a Map operation on ds + ds = ds->Map({one_hot_op}, {"label"}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure: invalid OneHot input + EXPECT_EQ(iter, nullptr); +} + +TEST_F(MindDataTestPipeline, TestPadEndSuccess1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndSuccess1."; + // Test PadEnd basic with int as pad_value + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create PadEnd op + std::shared_ptr pad_value; + ASSERT_OK(Tensor::CreateScalar(0, &pad_value)); + mindspore::MSTensor pad_value_MSTensor = + mindspore::MSTensor(std::make_shared(pad_value)); + + transforms::PadEnd pad_end = transforms::PadEnd({3}, pad_value_MSTensor); + ds = ds->Map({pad_end}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = {{31354, 0, 0}, {-5655, 0, 0}, {-17734, 0, 0}, {-17220, 0, 0}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestPadEndSuccess2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndSuccess2."; + // Test PadEnd with pad_shape equals to current shape, nothing padded + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {2})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create PadEnd op + std::shared_ptr pad_value; + ASSERT_OK(Tensor::CreateScalar(0, &pad_value)); + mindspore::MSTensor pad_value_MSTensor = + mindspore::MSTensor(std::make_shared(pad_value)); + + transforms::PadEnd pad_end = transforms::PadEnd({2}, pad_value_MSTensor); + ds = ds->Map({pad_end}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = {{31354, 31354}, {-5655, -5655}, {-17734, -17734}, {-17220, -17220}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestPadEndSuccess3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndSuccess3."; + // Test PadEnd without pad_value (using default pad_value) + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create PadEnd op + transforms::PadEnd pad_end = transforms::PadEnd({3}); + ds = ds->Map({pad_end}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = {{31354, 0, 0}, {-5655, 0, 0}, {-17734, 0, 0}, {-17220, 0, 0}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestPadEndSuccess4) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndSuccess4."; + // Test PadEnd with pad_shape less than current shape, will truncate the values + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {4})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create PadEnd op + std::shared_ptr pad_value; + ASSERT_OK(Tensor::CreateScalar(0, &pad_value)); + mindspore::MSTensor pad_value_MSTensor = + mindspore::MSTensor(std::make_shared(pad_value)); + + transforms::PadEnd pad_end = transforms::PadEnd({2}, pad_value_MSTensor); + ds = ds->Map({pad_end}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = {{31354, 31354}, {-5655, -5655}, {-17734, -17734}, {-17220, -17220}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestPadEndSuccess5) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndSuccess5."; + // Test PadEnd with string as pad_value + + // Create a TextFile dataset + std::string data_file = datasets_root_path_ + "/testTokenizerData/1.txt"; + std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create Take operation on ds + ds = ds->Take(1); + EXPECT_NE(ds, nullptr); + + // Create BasicTokenizer operation on ds + std::shared_ptr basic_tokenizer = std::make_shared(true); + EXPECT_NE(basic_tokenizer, nullptr); + + // Create Map operation on ds + ds = ds->Map({basic_tokenizer}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create PadEnd op + std::shared_ptr pad_value; + ASSERT_OK(Tensor::CreateScalar("pad_string", &pad_value)); + mindspore::MSTensor pad_value_MSTensor = + mindspore::MSTensor(std::make_shared(pad_value)); + + transforms::PadEnd pad_end = transforms::PadEnd({5}, pad_value_MSTensor); + ds = ds->Map({pad_end}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = {{"welcome", "to", "beijing", "!", "pad_string"}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["text"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 1); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestPadEndFail) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndFail."; + // Test PadEnd with type mismatch, source and pad_value are not of the same type. + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(1, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(1); + EXPECT_NE(ds, nullptr); + + // Create PadEnd op + std::shared_ptr pad_value; + ASSERT_OK(Tensor::CreateScalar("0", &pad_value)); + mindspore::MSTensor pad_value_MSTensor = + mindspore::MSTensor(std::make_shared(pad_value)); + + transforms::PadEnd pad_end = transforms::PadEnd({3}, pad_value_MSTensor); + ds = ds->Map({pad_end}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + // Expect failure: type mismatch, pad a string to Int16 dataset + EXPECT_ERROR(iter->GetNextRow(&row)); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestRandomApplySuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplySuccess."; + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, std::make_shared(false, 5)); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + auto resize_op = vision::Resize({777, 777}); + auto random_apply = transforms::RandomApply({resize_op}, 0.8); + + // Create a Map operation on ds + ds = ds->Map({random_apply}, {"image"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + auto label = row["label"]; MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); MS_LOG(INFO) << "Label shape: " << label.Shape(); - EXPECT_EQ(image.Shape().size() == 4 && batch_size == image.Shape()[0] && 3 == image.Shape()[1] && - 32 == image.Shape()[2] && 32 == image.Shape()[3], - true); - EXPECT_EQ(label.Shape().size() == 2 && batch_size == label.Shape()[0] && number_of_classes == label.Shape()[1], - true); ASSERT_OK(iter->GetNextRow(&row)); } - EXPECT_EQ(i, 2); + EXPECT_EQ(i, 5); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestRandomApplyFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplyFail1 with invalid transform."; + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + EXPECT_NE(ds, nullptr); + + // Resize: Non-positive size value: -1 at element: 0 + // RandomApply: transform ops must not be null + auto decode_op = vision::Decode(); + auto resize_op = vision::Resize({-1}); + auto random_apply = transforms::RandomApply({decode_op, resize_op}); + + // Create a Map operation on ds + ds = ds->Map({random_apply}, {"image"}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure: invalid RandomApply parameter (transform ops must not be null) + EXPECT_EQ(iter, nullptr); +} + +TEST_F(MindDataTestPipeline, TestRandomApplyFail2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplyFail2 with invalid transform."; + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + EXPECT_NE(ds, nullptr); + + // RandomApply: transform ops must not be null + std::shared_ptr decode_op = std::make_shared(); + std::shared_ptr random_apply(new transforms::RandomApply({decode_op, nullptr})); - // Manually terminate the pipeline - iter->Stop(); + // Create a Map operation on ds + ds = ds->Map({random_apply}, {"image"}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure: invalid RandomApply parameter (transform ops must not be null) + EXPECT_EQ(iter, nullptr); } -TEST_F(MindDataTestPipeline, TestOneHotSuccess2) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotSuccess2."; +TEST_F(MindDataTestPipeline, TestRandomApplyFail3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplyFail3 with invalid transform."; + // Create a Cifar10 Dataset std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); EXPECT_NE(ds, nullptr); - // Create a Batch operation on ds - int32_t batch_size = 5; - ds = ds->Batch(batch_size); + // RandomApply: Probability has to be between 0 and 1 + auto resize_op = vision::Resize({100}); + auto random_apply = transforms::RandomApply({resize_op}, -1); + + // Create a Map operation on ds + ds = ds->Map({random_apply}, {"image"}); EXPECT_NE(ds, nullptr); - // Create objects for the tensor ops - std::shared_ptr one_hot_op = std::make_shared(10); + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure: invalid RandomApply parameter (Probability has to be between 0 and 1) + EXPECT_EQ(iter, nullptr); +} + +TEST_F(MindDataTestPipeline, TestRandomApplyFail4) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplyFail4 with invalid transform."; + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + EXPECT_NE(ds, nullptr); + + // RandomApply: transform list must not be empty + std::vector> list = {}; + auto random_apply = transforms::RandomApply(list); // Create a Map operation on ds - ds = ds->Map({one_hot_op}, {"label"}); + ds = ds->Map({random_apply}, {"image"}); EXPECT_NE(ds, nullptr); - std::shared_ptr mixup_batch_op = std::make_shared(2.0); + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure: invalid RandomApply parameter (transform list must not be empty) + EXPECT_EQ(iter, nullptr); +} + +TEST_F(MindDataTestPipeline, TestRandomChoiceSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomChoiceSuccess."; + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, std::make_shared(false, 3)); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr resize_op1(new vision::Resize({777, 777})); + std::shared_ptr resize_op2(new vision::Resize({888, 888})); + auto random_choice = transforms::RandomChoice({resize_op1, resize_op2}); // Create a Map operation on ds - ds = ds->Map({mixup_batch_op}, {"image", "label"}); + ds = ds->Map({random_choice}, {"image"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -825,58 +1900,87 @@ TEST_F(MindDataTestPipeline, TestOneHotSuccess2) { while (row.size() != 0) { i++; auto image = row["image"]; + auto label = row["label"]; MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + MS_LOG(INFO) << "Label shape: " << label.Shape(); ASSERT_OK(iter->GetNextRow(&row)); } - EXPECT_EQ(i, 2); + EXPECT_EQ(i, 3); // Manually terminate the pipeline iter->Stop(); } -TEST_F(MindDataTestPipeline, TestOneHotFail1) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotFail1 with invalid params."; +TEST_F(MindDataTestPipeline, TestRandomChoiceFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomChoiceFail1 with invalid transform."; + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + RandomSampler sampler = RandomSampler(false, 10); + std::shared_ptr ds = Cifar10(folder_path, "all", sampler); + EXPECT_NE(ds, nullptr); + + // Resize: Non-positive size value: -1 at element: 0 + // RandomChoice: transform ops must not be null + auto decode_op = vision::Decode(); + auto resize_op = vision::Resize({-1}); + auto random_choice = transforms::RandomChoice({decode_op, resize_op}); + + // Create a Map operation on ds + ds = ds->Map({random_choice}, {"image"}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure: invalid RandomApply parameter (transform ops must not be null) + EXPECT_EQ(iter, nullptr); +} + +TEST_F(MindDataTestPipeline, TestRandomChoiceFail2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomChoiceFail2 with invalid transform."; // Create a Cifar10 Dataset std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); EXPECT_NE(ds, nullptr); - // incorrect num_class - std::shared_ptr one_hot_op = std::make_shared(0); + // RandomChoice: transform ops must not be null + std::shared_ptr decode_op = std::make_shared(); + std::shared_ptr random_choice(new transforms::RandomApply({decode_op, nullptr})); // Create a Map operation on ds - ds = ds->Map({one_hot_op}, {"label"}); + ds = ds->Map({random_choice}, {"image"}); EXPECT_NE(ds, nullptr); std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid OneHot input + // Expect failure: invalid RandomApply parameter (transform ops must not be null) EXPECT_EQ(iter, nullptr); } -TEST_F(MindDataTestPipeline, TestOneHotFail2) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotFail2 with invalid params."; +TEST_F(MindDataTestPipeline, TestRandomChoiceFail3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomChoiceFail3 with invalid transform."; // Create a Cifar10 Dataset std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); EXPECT_NE(ds, nullptr); - // incorrect num_class - std::shared_ptr one_hot_op = std::make_shared(-5); + // RandomChoice: transform list must not be empty + std::vector> list = {}; + auto random_choice = transforms::RandomChoice(list); // Create a Map operation on ds - ds = ds->Map({one_hot_op}, {"label"}); + ds = ds->Map({random_choice}, {"image"}); EXPECT_NE(ds, nullptr); std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid OneHot input + // Expect failure: invalid RandomApply parameter (transform list must not be empty) EXPECT_EQ(iter, nullptr); } -TEST_F(MindDataTestPipeline, TestPadEndSuccess) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndSuccess."; +TEST_F(MindDataTestPipeline, TestSliceSuccess1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceSuccess1."; + // Test Slice int with user defined slice object. // Create a RandomDataset u_int32_t curr_seed = GlobalContext::config_manager()->seed(); @@ -888,14 +1992,153 @@ TEST_F(MindDataTestPipeline, TestPadEndSuccess) { ds = ds->SetNumWorkers(2); EXPECT_NE(ds, nullptr); - // Create PadEnd op - std::shared_ptr pad_value; - ASSERT_OK(Tensor::CreateScalar(0, &pad_value)); - mindspore::MSTensor pad_value_MSTensor = - mindspore::MSTensor(std::make_shared(pad_value)); + // Create concatenate op + std::vector prepend_vector = {1, 2, 3}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); - transforms::PadEnd pad_end = transforms::PadEnd({3}, pad_value_MSTensor); - ds = ds->Map({pad_end}, {"col1"}); + transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Apply Slice op on ds, get the first and third elements in each row. + SliceOption slice_option = SliceOption(Slice(0, 3, 2)); + transforms::Slice slice = transforms::Slice({slice_option}); + ds = ds->Map({slice}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = {{1, 3}, {1, 3}, {1, 3}, {1, 3}}; + + // Check slice results + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestSliceSuccess2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceSuccess2."; + // Test Slice int with bool true (slice all). + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create concatenate op + std::vector prepend_vector = {1, 2, 3}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Apply Slice op on ds, get the first and third elements in each row. + SliceOption slice_option = SliceOption(true); + transforms::Slice slice = transforms::Slice({slice_option}); + ds = ds->Map({slice}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector> expected = { + {1, 2, 3, 31354}, {1, 2, 3, -5655}, {1, 2, 3, -17734}, {1, 2, 3, -17220}}; + + // Check slice results + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestSliceSuccess3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceSuccess3."; + // Test Slice int with list of indices including negative. + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create concatenate op + std::vector prepend_vector = {1, 2, 3}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Apply Slice op on ds, get the first and third elements in each row. + std::vector indices = {-1, 2}; + SliceOption slice_option = SliceOption(indices); + transforms::Slice slice = transforms::Slice({slice_option}); + ds = ds->Map({slice}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -907,8 +2150,9 @@ TEST_F(MindDataTestPipeline, TestPadEndSuccess) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - std::vector> expected = {{31354, 0, 0}, {-5655, 0, 0}, {-17734, 0, 0}, {-17220, 0, 0}}; + std::vector> expected = {{31354, 3}, {-5655, 3}, {-17734, 3}, {-17220, 3}}; + // Check slice results uint64_t i = 0; while (row.size() != 0) { auto ind = row["col1"]; @@ -928,20 +2172,32 @@ TEST_F(MindDataTestPipeline, TestPadEndSuccess) { GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestRandomApplySuccess) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplySuccess."; +TEST_F(MindDataTestPipeline, TestSliceSuccess4) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceSuccess4."; + // Test Slice string with list of indices. - // Create an ImageFolder Dataset - std::string folder_path = datasets_root_path_ + "/testPK/data/"; - std::shared_ptr ds = ImageFolder(folder_path, true, std::make_shared(false, 5)); + // Create a TextFile dataset + std::string data_file = datasets_root_path_ + "/testTokenizerData/1.txt"; + std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); EXPECT_NE(ds, nullptr); - // Create objects for the tensor ops - auto resize_op = vision::Resize({777, 777}); - auto random_apply = transforms::RandomApply({resize_op}, 0.8); + // Create Take operation on ds + ds = ds->Take(1); + EXPECT_NE(ds, nullptr); - // Create a Map operation on ds - ds = ds->Map({random_apply}, {"image"}); + // Create BasicTokenizer operation on ds + std::shared_ptr basic_tokenizer = std::make_shared(true); + EXPECT_NE(basic_tokenizer, nullptr); + + // Create Map operation on ds + ds = ds->Map({basic_tokenizer}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Apply Slice op on ds, get the first and third elements in each row. + std::vector indices = {-1, -2, 1, 0}; + SliceOption slice_option = SliceOption(indices); + transforms::Slice slice = transforms::Slice({slice_option}); + ds = ds->Map({slice}, {"text"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -953,123 +2209,49 @@ TEST_F(MindDataTestPipeline, TestRandomApplySuccess) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); + std::vector> expected = {{"!", "beijing", "to", "welcome"}}; + + // Check slice results uint64_t i = 0; while (row.size() != 0) { - i++; - auto image = row["image"]; - auto label = row["label"]; - MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); - MS_LOG(INFO) << "Label shape: " << label.Shape(); + auto ind = row["text"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); ASSERT_OK(iter->GetNextRow(&row)); + i++; } - EXPECT_EQ(i, 5); + EXPECT_EQ(i, 1); // Manually terminate the pipeline iter->Stop(); } -TEST_F(MindDataTestPipeline, TestRandomApplyFail1) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplyFail1 with invalid transform."; - - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); - EXPECT_NE(ds, nullptr); - - // Resize: Non-positive size value: -1 at element: 0 - // RandomApply: transform ops must not be null - auto decode_op = vision::Decode(); - auto resize_op = vision::Resize({-1}); - auto random_apply = transforms::RandomApply({decode_op, resize_op}); - - // Create a Map operation on ds - ds = ds->Map({random_apply}, {"image"}); - EXPECT_NE(ds, nullptr); - - std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid RandomApply parameter (transform ops must not be null) - EXPECT_EQ(iter, nullptr); -} - -TEST_F(MindDataTestPipeline, TestRandomApplyFail2) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplyFail2 with invalid transform."; - - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); - EXPECT_NE(ds, nullptr); - - // RandomApply: transform ops must not be null - std::shared_ptr decode_op = std::make_shared(); - std::shared_ptr random_apply(new transforms::RandomApply({decode_op, nullptr})); - - // Create a Map operation on ds - ds = ds->Map({random_apply}, {"image"}); - EXPECT_NE(ds, nullptr); - - std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid RandomApply parameter (transform ops must not be null) - EXPECT_EQ(iter, nullptr); -} - -TEST_F(MindDataTestPipeline, TestRandomApplyFail3) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplyFail3 with invalid transform."; - - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); - EXPECT_NE(ds, nullptr); - - // RandomApply: Probability has to be between 0 and 1 - auto resize_op = vision::Resize({100}); - auto random_apply = transforms::RandomApply({resize_op}, -1); - - // Create a Map operation on ds - ds = ds->Map({random_apply}, {"image"}); - EXPECT_NE(ds, nullptr); - - std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid RandomApply parameter (Probability has to be between 0 and 1) - EXPECT_EQ(iter, nullptr); -} - -TEST_F(MindDataTestPipeline, TestRandomApplyFail4) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplyFail4 with invalid transform."; - - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); - EXPECT_NE(ds, nullptr); - - // RandomApply: transform list must not be empty - std::vector> list = {}; - auto random_apply = transforms::RandomApply(list); +TEST_F(MindDataTestPipeline, TestSliceSuccess5) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceSuccess5."; + // Test Slice int on multi-dimension. - // Create a Map operation on ds - ds = ds->Map({random_apply}, {"image"}); + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {2, 2})); + std::shared_ptr ds = RandomData(4, schema); EXPECT_NE(ds, nullptr); - - std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid RandomApply parameter (transform list must not be empty) - EXPECT_EQ(iter, nullptr); -} - -TEST_F(MindDataTestPipeline, TestRandomChoiceSuccess) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomChoiceSuccess."; - - // Create an ImageFolder Dataset - std::string folder_path = datasets_root_path_ + "/testPK/data/"; - std::shared_ptr ds = ImageFolder(folder_path, true, std::make_shared(false, 3)); + ds = ds->SetNumWorkers(2); EXPECT_NE(ds, nullptr); - // Create objects for the tensor ops - std::shared_ptr resize_op1(new vision::Resize({777, 777})); - std::shared_ptr resize_op2(new vision::Resize({888, 888})); - auto random_choice = transforms::RandomChoice({resize_op1, resize_op2}); + // Apply Slice op on ds, get the first and third elements in each row. + std::vector indices1 = {0, 1}; + SliceOption slice_option1 = SliceOption(indices1); + std::vector indices2 = {-1}; + SliceOption slice_option2 = SliceOption(indices2); - // Create a Map operation on ds - ds = ds->Map({random_choice}, {"image"}); + transforms::Slice slice = transforms::Slice({slice_option1, slice_option2}); + ds = ds->Map({slice}, {"col1"}); EXPECT_NE(ds, nullptr); // Create an iterator over the result of the above dataset @@ -1081,103 +2263,80 @@ TEST_F(MindDataTestPipeline, TestRandomChoiceSuccess) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); + std::vector> expected = {{31354, 31354}, {-5655, -5655}, {-17734, -17734}, {-17220, -17220}}; + + // Check slice results uint64_t i = 0; while (row.size() != 0) { - i++; - auto image = row["image"]; - auto label = row["label"]; - MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); - MS_LOG(INFO) << "Label shape: " << label.Shape(); + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], TensorShape({1, 2}), &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); ASSERT_OK(iter->GetNextRow(&row)); + i++; } - EXPECT_EQ(i, 3); + EXPECT_EQ(i, 4); // Manually terminate the pipeline iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestRandomChoiceFail1) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomChoiceFail1 with invalid transform."; - - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - RandomSampler sampler = RandomSampler(false, 10); - std::shared_ptr ds = Cifar10(folder_path, "all", sampler); - EXPECT_NE(ds, nullptr); - - // Resize: Non-positive size value: -1 at element: 0 - // RandomChoice: transform ops must not be null - auto decode_op = vision::Decode(); - auto resize_op = vision::Resize({-1}); - auto random_choice = transforms::RandomChoice({decode_op, resize_op}); +TEST_F(MindDataTestPipeline, TestSliceFail1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceFail."; + // Test Slice with index out of bounds. - // Create a Map operation on ds - ds = ds->Map({random_choice}, {"image"}); + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {2})); + std::shared_ptr ds = RandomData(1, schema); EXPECT_NE(ds, nullptr); - - std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid RandomApply parameter (transform ops must not be null) - EXPECT_EQ(iter, nullptr); -} - -TEST_F(MindDataTestPipeline, TestRandomChoiceFail2) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomChoiceFail2 with invalid transform."; - - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); + ds = ds->SetNumWorkers(1); EXPECT_NE(ds, nullptr); - // RandomChoice: transform ops must not be null - std::shared_ptr decode_op = std::make_shared(); - std::shared_ptr random_choice(new transforms::RandomApply({decode_op, nullptr})); - - // Create a Map operation on ds - ds = ds->Map({random_choice}, {"image"}); + // Apply Slice op on ds, get the first and third elements in each row. + std::vector indices = {0, 2}; // index 2 is out of bounds + SliceOption slice_option = SliceOption(indices); + transforms::Slice slice = transforms::Slice({slice_option}); + ds = ds->Map({slice}, {"col1"}); EXPECT_NE(ds, nullptr); + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid RandomApply parameter (transform ops must not be null) - EXPECT_EQ(iter, nullptr); -} - -TEST_F(MindDataTestPipeline, TestRandomChoiceFail3) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomChoiceFail3 with invalid transform."; - - // Create a Cifar10 Dataset - std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; - std::shared_ptr ds = Cifar10(folder_path, "all", std::make_shared(false, 10)); - EXPECT_NE(ds, nullptr); - - // RandomChoice: transform list must not be empty - std::vector> list = {}; - auto random_choice = transforms::RandomChoice(list); + EXPECT_NE(iter, nullptr); - // Create a Map operation on ds - ds = ds->Map({random_choice}, {"image"}); - EXPECT_NE(ds, nullptr); + // Iterate the dataset and get each row + std::unordered_map row; + // Expect failure: the index 2 is out of the bounds + EXPECT_ERROR(iter->GetNextRow(&row)); - std::shared_ptr iter = ds->CreateIterator(); - // Expect failure: invalid RandomApply parameter (transform list must not be empty) - EXPECT_EQ(iter, nullptr); + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestSliceSuccess) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceSuccess."; +TEST_F(MindDataTestPipeline, TestSliceFail2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceFail2."; + // Test Slice with false as input SliceOption only (no other index nor slice list provided) // Create a RandomDataset u_int32_t curr_seed = GlobalContext::config_manager()->seed(); GlobalContext::config_manager()->set_seed(246); std::shared_ptr schema = Schema(); ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1})); - std::shared_ptr ds = RandomData(4, schema); + std::shared_ptr ds = RandomData(1, schema); EXPECT_NE(ds, nullptr); - ds = ds->SetNumWorkers(2); + ds = ds->SetNumWorkers(1); EXPECT_NE(ds, nullptr); // Create concatenate op - std::vector prepend_vector = {1, 2, 3}; + std::vector prepend_vector = {1, 2, 3}; std::shared_ptr prepend_tensor; ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); mindspore::MSTensor prepend_MSTensor = @@ -1190,7 +2349,7 @@ TEST_F(MindDataTestPipeline, TestSliceSuccess) { EXPECT_NE(ds, nullptr); // Apply Slice op on ds, get the first and third elements in each row. - SliceOption slice_option = SliceOption(Slice(0, 3, 2)); + SliceOption slice_option = SliceOption(false); transforms::Slice slice = transforms::Slice({slice_option}); ds = ds->Map({slice}, {"col1"}); EXPECT_NE(ds, nullptr); @@ -1202,26 +2361,9 @@ TEST_F(MindDataTestPipeline, TestSliceSuccess) { // Iterate the dataset and get each row std::unordered_map row; - ASSERT_OK(iter->GetNextRow(&row)); - - std::vector> expected = {{1, 3}, {1, 3}, {1, 3}, {1, 3}}; - - // Check slice results - uint64_t i = 0; - while (row.size() != 0) { - auto ind = row["col1"]; - std::shared_ptr de_expected_tensor; - ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); - mindspore::MSTensor expected_tensor = - mindspore::MSTensor(std::make_shared(de_expected_tensor)); - EXPECT_MSTENSOR_EQ(ind, expected_tensor); - ASSERT_OK(iter->GetNextRow(&row)); - i++; - } + // Expect failure: SliceOption is false and no other index nor slice list provided + EXPECT_ERROR(iter->GetNextRow(&row)); - EXPECT_EQ(i, 4); - - // Manually terminate the pipeline iter->Stop(); GlobalContext::config_manager()->set_seed(curr_seed); } @@ -1285,7 +2427,8 @@ TEST_F(MindDataTestPipeline, TestTypeCastFail) { EXPECT_NE(ds, nullptr); // incorrect data type - std::shared_ptr type_cast = std::make_shared(mindspore::DataType::kTypeUnknown); + std::shared_ptr type_cast = + std::make_shared(mindspore::DataType::kTypeUnknown); // Create a Map operation on ds ds = ds->Map({type_cast}, {"image", "label"}); diff --git a/tests/ut/python/dataset/test_mask_op.py b/tests/ut/python/dataset/test_mask_op.py index 4ee2f7ab1e..9c93ca3eb0 100644 --- a/tests/ut/python/dataset/test_mask_op.py +++ b/tests/ut/python/dataset/test_mask_op.py @@ -121,7 +121,7 @@ def test_mask_exceptions_str(): with pytest.raises(RuntimeError) as info: mask_compare(["1", "2", "3", "4", "5"], ops.Relational.EQ, "3.5", mstype.string) - assert "only support numeric datatype of input." in str(info.value) + assert "Only supports bool or numeric datatype for generated mask type." in str(info.value) if __name__ == "__main__":