diff --git a/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt index d2310b598d..6207f0d2a7 100644 --- a/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt @@ -15,7 +15,6 @@ if(ENABLE_PYTHON) python/bindings/dataset/engine/ir/execute/bindings.cc python/bindings/dataset/engine/ir/schema/bindings.cc python/bindings/dataset/kernels/bindings.cc - python/bindings/dataset/kernels/data/bindings.cc python/bindings/dataset/kernels/ir/bindings.cc python/bindings/dataset/kernels/ir/image/bindings.cc python/bindings/dataset/text/bindings.cc diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc deleted file mode 100644 index 7b3f6d33d3..0000000000 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pybind11/pybind11.h" -#include "pybind11/stl.h" -#include "pybind11/stl_bind.h" - -#include "minddata/dataset/api/python/pybind_register.h" -#include "minddata/dataset/kernels/data/fill_op.h" -#include "minddata/dataset/kernels/data/to_float16_op.h" - -namespace mindspore { -namespace dataset { - -PYBIND_REGISTER( - FillOp, 1, ([](const py::module *m) { - (void)py::class_>(*m, "FillOp").def(py::init>()); - })); - -PYBIND_REGISTER(ToFloat16Op, 1, ([](const py::module *m) { - (void)py::class_>(*m, "ToFloat16Op", - py::dynamic_attr()) - .def(py::init<>()); - })); - -} // namespace dataset -} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc index be8b28c842..abd157540b 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc @@ -86,6 +86,17 @@ PYBIND_REGISTER( })); })); +PYBIND_REGISTER(FillOperation, 1, ([](const py::module *m) { + (void) + py::class_>( + *m, "FillOperation") + .def(py::init([](std::shared_ptr fill_value) { + auto fill = std::make_shared(fill_value); + THROW_IF_ERROR(fill->ValidateParams()); + return fill; + })); + })); + PYBIND_REGISTER(MaskOperation, 1, ([](const py::module *m) { (void) py::class_>( diff --git a/mindspore/ccsrc/minddata/dataset/api/transforms.cc b/mindspore/ccsrc/minddata/dataset/api/transforms.cc index c9670e7125..2b76f9eed6 100644 --- a/mindspore/ccsrc/minddata/dataset/api/transforms.cc +++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc @@ -85,6 +85,20 @@ Duplicate::Duplicate() {} std::shared_ptr Duplicate::Parse() { return std::make_shared(); } #ifndef ENABLE_ANDROID +// Constructor to Fill +struct Fill::Data { + explicit Data(MSTensor fill_value) : fill_value_(fill_value) {} + MSTensor fill_value_; +}; + +Fill::Fill(MSTensor fill_value) : data_(std::make_shared(fill_value)) {} + +std::shared_ptr Fill::Parse() { + std::shared_ptr out_fill_value; + Tensor::CreateFromMSTensor(data_->fill_value_, &out_fill_value); + return std::make_shared(out_fill_value); +} + // Constructor to Mask struct Mask::Data { explicit Data(RelationalOp op, MSTensor constant, mindspore::DataType ms_type) diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h index c41a687e3d..5182c65d1d 100644 --- a/mindspore/ccsrc/minddata/dataset/include/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h @@ -194,6 +194,30 @@ class Duplicate final : public TensorTransform { std::shared_ptr Parse() override; }; +/// \brief Fill Op. +/// \notes Tensor operation to fill all elements in the tensor with the specified value. +/// The output tensor will have the same shape and type as the input tensor. +class Fill final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] fill_value Scalar value to fill the tensor with. + /// Can only be MSTensor of the following types from mindspore::DataType: + /// String, Bool, Int8/16/32/64, UInt8/16/32/64, Float16/32/64. + explicit Fill(MSTensor fill_value); + + /// \brief Destructor + ~Fill() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + /// \brief Mask Op. /// \notes Mask content of the input tensor with the given predicate. /// Any element of the tensor that matches the predicate will be evaluated to True, otherwise False. diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc index 952d89d8f0..6ee48ea306 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc @@ -26,6 +26,7 @@ #endif #include "minddata/dataset/kernels/data/duplicate_op.h" #ifndef ENABLE_ANDROID +#include "minddata/dataset/kernels/data/fill_op.h" #include "minddata/dataset/kernels/data/mask_op.h" #endif #include "minddata/dataset/kernels/data/one_hot_op.h" @@ -111,6 +112,29 @@ Status DuplicateOperation::ValidateParams() { return Status::OK(); } std::shared_ptr DuplicateOperation::Build() { return std::make_shared(); } #ifndef ENABLE_ANDROID + +// FillOperation +FillOperation::FillOperation(std::shared_ptr fill_value) : fill_value_(fill_value) {} + +Status FillOperation::ValidateParams() { + if (fill_value_->shape() != TensorShape::CreateScalar()) { + std::string err_msg = "Fill: fill_value is not a scalar tensor."; + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + + return Status::OK(); +} + +std::shared_ptr FillOperation::Build() { return std::make_shared(fill_value_); } + +Status FillOperation::to_json(nlohmann::json *out_json) { + nlohmann::json args; + args["fill_value"] = fill_value_->ToString(); + *out_json = args; + return Status::OK(); +} + // MaskOperation MaskOperation::MaskOperation(RelationalOp op, const std::shared_ptr &constant, DataType dtype) : op_(op), constant_(constant), dtype_(dtype) {} diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h index 45655c0c96..37c22ee046 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h @@ -31,6 +31,7 @@ namespace dataset { constexpr char kComposeOperation[] = "Compose"; constexpr char kConcatenateOperation[] = "Concatenate"; constexpr char kDuplicateOperation[] = "Duplicate"; +constexpr char kFillOperation[] = "Fill"; constexpr char kMaskOperation[] = "Mask"; constexpr char kOneHotOperation[] = "OneHot"; constexpr char kPadEndOperation[] = "PadEnd"; @@ -93,6 +94,24 @@ class DuplicateOperation : public TensorOperation { std::string Name() const override { return kDuplicateOperation; } }; +class FillOperation : public TensorOperation { + public: + explicit FillOperation(std::shared_ptr fill_value); + + ~FillOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kFillOperation; } + + Status to_json(nlohmann::json *out_json) override; + + private: + std::shared_ptr fill_value_; +}; + class MaskOperation : public TensorOperation { public: explicit MaskOperation(RelationalOp op, const std::shared_ptr &constant, DataType dtype); diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index 8de8ef3b29..4b221c6014 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -77,7 +77,7 @@ class OneHot(TensorOperation): return cde.OneHotOperation(self.num_classes) -class Fill(cde.FillOp): +class Fill(TensorOperation): """ Tensor operation to fill all elements in the tensor with the specified value. The output tensor will have the same shape and type as the input tensor. @@ -101,7 +101,10 @@ class Fill(cde.FillOp): @check_fill_value def __init__(self, fill_value): - super().__init__(cde.Tensor(np.array(fill_value))) + self.fill_value = cde.Tensor(np.array(fill_value)) + + def parse(self): + return cde.FillOperation(self.fill_value) class TypeCast(TensorOperation): diff --git a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc index 2bd7981a7f..237df886bd 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc @@ -475,6 +475,82 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic7) { GlobalContext::config_manager()->set_seed(curr_seed); } +TEST_F(MindDataTestPipeline, TestRandomDatasetUInt8) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetUInt8."; + + // Create a RandomDataset with UInt8 numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4}); + std::shared_ptr ds = RandomData(3, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(3); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestRandomDatasetFloat) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetFloat."; + + // Create a RandomDataset with Float numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(369); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeFloat16, {2, 3}); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + TEST_F(MindDataTestPipeline, TestRandomDatasetDuplicateColumnName) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetDuplicateColumnName."; diff --git a/tests/ut/cpp/dataset/c_api_transforms_test.cc b/tests/ut/cpp/dataset/c_api_transforms_test.cc index d45ed50bd4..3366fa7396 100644 --- a/tests/ut/cpp/dataset/c_api_transforms_test.cc +++ b/tests/ut/cpp/dataset/c_api_transforms_test.cc @@ -241,6 +241,434 @@ TEST_F(MindDataTestPipeline, TestDuplicateSuccess) { iter->Stop(); } +TEST_F(MindDataTestPipeline, TestFillSuccessInt) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessInt."; + + // Create a RandomDataset with Int32 numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(864); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeInt32, {6}); + std::shared_ptr ds = RandomData(5, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(3); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with 3 + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar(3, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + std::vector> expected = { + {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 5); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestFillSuccessBool) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessBool."; + + // Create a RandomDataset with bool values for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeBool, {4}); + std::shared_ptr ds = RandomData(3, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with zero + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar((bool)true, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + std::vector> expected = { + {true, true, true, true}, {true, true, true, true}, {true, true, true, true}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecast) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecast."; + + // Create a RandomDataset with UInt8 numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4}); + std::shared_ptr ds = RandomData(3, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with -3 + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar(-3, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + // Note: 2**8 -3 = 256 -3 = 253 + std::vector> expected = {{253, 253, 253, 253}, {253, 253, 253, 253}, {253, 253, 253, 253}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecastZero) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecastZero."; + + // Create a RandomDataset with UInt8 numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4}); + std::shared_ptr ds = RandomData(3, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with zero + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar(0, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + // Note: 2**8 = 256 + std::vector> expected = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecast16) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecast16."; + + // Create a RandomDataset with UInt16 numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeUInt16, {4}); + std::shared_ptr ds = RandomData(3, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with -3 + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar(-3, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + // Note: 2**16 -3 = 65536 -3 = 65533 + std::vector> expected = { + {65533, 65533, 65533, 65533}, {65533, 65533, 65533, 65533}, {65533, 65533, 65533, 65533}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestFillSuccessUpTypecast) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessUpTypecast."; + + // Create a RandomDataset with Float numbers for given shape + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(963); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeFloat32, {2}); + std::shared_ptr ds = RandomData((float)4.0, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with zeroes + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar(0, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + std::vector> expected = {{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + +TEST_F(MindDataTestPipeline, TestFillSuccessString) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessString."; + + // Create a TextFile dataset + std::string data_file = datasets_root_path_ + "/testTokenizerData/basic_tokenizer.txt"; + std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create Skip operation on ds + ds = ds->Skip(6); + EXPECT_NE(ds, nullptr); + + // Create BasicTokenizer operation on ds + std::shared_ptr basic_tokenizer = std::make_shared(true); + EXPECT_NE(basic_tokenizer, nullptr); + + // Create Map operation on ds + ds = ds->Map({basic_tokenizer}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create Fill op - to fill with string + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateScalar("Hello", &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + std::vector expected = {"Hello", "Hello", "Hello", "Hello", "Hello"}; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected, &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["text"]; + TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 1); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestFillFailFillValueNotScalar) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillFailFillValueNotScalar."; + // Test BasicTokenizer with lower_case true + + // Create a TextFile dataset + std::string data_file = datasets_root_path_ + "/testTokenizerData/basic_tokenizer.txt"; + std::shared_ptr ds = TextFile({data_file}, 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create Skip operation on ds + ds = ds->Skip(6); + EXPECT_NE(ds, nullptr); + + // Create BasicTokenizer operation on ds + std::shared_ptr basic_tokenizer = std::make_shared(true); + EXPECT_NE(basic_tokenizer, nullptr); + + // Create Map operation on ds + ds = ds->Map({basic_tokenizer}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create Fill op - with wrongful vector shape instead of scalar + std::vector fill_string = {"ERROR"}; + std::shared_ptr fill_value_tensor; + ASSERT_OK(Tensor::CreateFromVector(fill_string, &fill_value_tensor)); + mindspore::MSTensor fill_value_MSTensor = + mindspore::MSTensor(std::make_shared(fill_value_tensor)); + transforms::Fill mask = transforms::Fill(fill_value_MSTensor); + ds = ds->Map({mask}, {"text"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + + // Expect failure: invalid Fill parameter (the shape of fill_value is not a scalar) + EXPECT_EQ(iter, nullptr); +} + TEST_F(MindDataTestPipeline, TestMaskSuccess) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskSuccess."; @@ -866,4 +1294,4 @@ TEST_F(MindDataTestPipeline, TestTypeCastFail) { std::shared_ptr iter = ds->CreateIterator(); // Expect failure: invalid TypeCast input EXPECT_EQ(iter, nullptr); -} \ No newline at end of file +} diff --git a/tests/ut/python/dataset/test_fill_op.py b/tests/ut/python/dataset/test_fill_op.py index e8e6dcf696..a6df373c95 100644 --- a/tests/ut/python/dataset/test_fill_op.py +++ b/tests/ut/python/dataset/test_fill_op.py @@ -73,6 +73,19 @@ def test_fillop_string(): np.testing.assert_array_equal(data_row[0], expected) +def test_fillop_bytes(): + def gen(): + yield (np.array(["A", "B", "C"], dtype='S'),) + + data = ds.GeneratorDataset(gen, column_names=["col"]) + fill_op = data_trans.Fill(b'abc') + + data = data.map(operations=fill_op, input_columns=["col"]) + expected = np.array([b'abc', b'abc', b'abc'], dtype='S') + for data_row in data.create_tuple_iterator(output_numpy=True): + np.testing.assert_array_equal(data_row[0], expected) + + def test_fillop_error_handling(): def gen(): yield (np.array([4, 4, 4, 4]),) @@ -92,4 +105,5 @@ if __name__ == "__main__": test_fillop_up_type_cast() test_fillop_down_type_cast() test_fillop_string() + test_fillop_bytes() test_fillop_error_handling() diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py index e68065b6be..ef69671d25 100644 --- a/tests/ut/python/dataset/test_serdes_dataset.py +++ b/tests/ut/python/dataset/test_serdes_dataset.py @@ -145,90 +145,6 @@ def test_serdes_mnist_dataset(remove_json_files=True): delete_json_files() -def test_serdes_zip_dataset(remove_json_files=True): - """ - Test serdes on zip dataset pipeline. - """ - files = ["../data/dataset/testTFTestAllTypes/test.data"] - schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema.json" - ds.config.set_seed(1) - - ds0 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.GLOBAL) - data1 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.GLOBAL) - data2 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.FILES) - data2 = data2.shuffle(10000) - data2 = data2.rename(input_columns=["col_sint16", "col_sint32", "col_sint64", "col_float", - "col_1d", "col_2d", "col_3d", "col_binary"], - output_columns=["column_sint16", "column_sint32", "column_sint64", "column_float", - "column_1d", "column_2d", "column_3d", "column_binary"]) - data3 = ds.zip((data1, data2)) - ds.serialize(data3, "zip_dataset_pipeline.json") - assert validate_jsonfile("zip_dataset_pipeline.json") is True - assert validate_jsonfile("zip_dataset_pipeline_typo.json") is False - - data4 = ds.deserialize(json_filepath="zip_dataset_pipeline.json") - ds.serialize(data4, "zip_dataset_pipeline_1.json") - assert validate_jsonfile("zip_dataset_pipeline_1.json") is True - assert filecmp.cmp('zip_dataset_pipeline.json', 'zip_dataset_pipeline_1.json') - - rows = 0 - for d0, d3, d4 in zip(ds0.create_tuple_iterator(output_numpy=True), data3.create_tuple_iterator(output_numpy=True), - data4.create_tuple_iterator(output_numpy=True)): - num_cols = len(d0) - offset = 0 - for t1 in d0: - np.testing.assert_array_equal(t1, d3[offset]) - np.testing.assert_array_equal(t1, d3[offset + num_cols]) - np.testing.assert_array_equal(t1, d4[offset]) - np.testing.assert_array_equal(t1, d4[offset + num_cols]) - offset += 1 - rows += 1 - assert rows == 12 - - if remove_json_files: - delete_json_files() - - -def test_serdes_random_crop(): - """ - Test serdes on RandomCrop pipeline. - """ - logger.info("test_random_crop") - DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] - SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" - original_seed = config_get_set_seed(1) - original_num_parallel_workers = config_get_set_num_parallel_workers(1) - - # First dataset - data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) - decode_op = vision.Decode() - random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200]) - data1 = data1.map(operations=decode_op, input_columns="image") - data1 = data1.map(operations=random_crop_op, input_columns="image") - - # Serializing into python dictionary - ds1_dict = ds.serialize(data1) - # Serializing into json object - _ = json.dumps(ds1_dict, indent=2) - - # Reconstruct dataset pipeline from its serialized form - data1_1 = ds.deserialize(input_dict=ds1_dict) - - # Second dataset - data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) - data2 = data2.map(operations=decode_op, input_columns="image") - - for item1, item1_1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), - data1_1.create_dict_iterator(num_epochs=1, output_numpy=True), - data2.create_dict_iterator(num_epochs=1, output_numpy=True)): - np.testing.assert_array_equal(item1['image'], item1_1['image']) - _ = item2["image"] - - # Restore configuration num_parallel_workers - ds.config.set_seed(original_seed) - ds.config.set_num_parallel_workers(original_num_parallel_workers) - - def test_serdes_cifar10_dataset(remove_json_files=True): """ Test serdes on Cifar10 dataset pipeline @@ -351,6 +267,90 @@ def test_serdes_voc_dataset(remove_json_files=True): ds.config.set_num_parallel_workers(original_num_parallel_workers) +def test_serdes_zip_dataset(remove_json_files=True): + """ + Test serdes on zip dataset pipeline. + """ + files = ["../data/dataset/testTFTestAllTypes/test.data"] + schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema.json" + ds.config.set_seed(1) + + ds0 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.GLOBAL) + data1 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.GLOBAL) + data2 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.FILES) + data2 = data2.shuffle(10000) + data2 = data2.rename(input_columns=["col_sint16", "col_sint32", "col_sint64", "col_float", + "col_1d", "col_2d", "col_3d", "col_binary"], + output_columns=["column_sint16", "column_sint32", "column_sint64", "column_float", + "column_1d", "column_2d", "column_3d", "column_binary"]) + data3 = ds.zip((data1, data2)) + ds.serialize(data3, "zip_dataset_pipeline.json") + assert validate_jsonfile("zip_dataset_pipeline.json") is True + assert validate_jsonfile("zip_dataset_pipeline_typo.json") is False + + data4 = ds.deserialize(json_filepath="zip_dataset_pipeline.json") + ds.serialize(data4, "zip_dataset_pipeline_1.json") + assert validate_jsonfile("zip_dataset_pipeline_1.json") is True + assert filecmp.cmp('zip_dataset_pipeline.json', 'zip_dataset_pipeline_1.json') + + rows = 0 + for d0, d3, d4 in zip(ds0.create_tuple_iterator(output_numpy=True), data3.create_tuple_iterator(output_numpy=True), + data4.create_tuple_iterator(output_numpy=True)): + num_cols = len(d0) + offset = 0 + for t1 in d0: + np.testing.assert_array_equal(t1, d3[offset]) + np.testing.assert_array_equal(t1, d3[offset + num_cols]) + np.testing.assert_array_equal(t1, d4[offset]) + np.testing.assert_array_equal(t1, d4[offset + num_cols]) + offset += 1 + rows += 1 + assert rows == 12 + + if remove_json_files: + delete_json_files() + + +def test_serdes_random_crop(): + """ + Test serdes on RandomCrop pipeline. + """ + logger.info("test_random_crop") + DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] + SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" + original_seed = config_get_set_seed(1) + original_num_parallel_workers = config_get_set_num_parallel_workers(1) + + # First dataset + data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) + decode_op = vision.Decode() + random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200]) + data1 = data1.map(operations=decode_op, input_columns="image") + data1 = data1.map(operations=random_crop_op, input_columns="image") + + # Serializing into python dictionary + ds1_dict = ds.serialize(data1) + # Serializing into json object + _ = json.dumps(ds1_dict, indent=2) + + # Reconstruct dataset pipeline from its serialized form + data1_1 = ds.deserialize(input_dict=ds1_dict) + + # Second dataset + data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) + data2 = data2.map(operations=decode_op, input_columns="image") + + for item1, item1_1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data1_1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): + np.testing.assert_array_equal(item1['image'], item1_1['image']) + _ = item2["image"] + + # Restore configuration num_parallel_workers + ds.config.set_seed(original_seed) + ds.config.set_num_parallel_workers(original_num_parallel_workers) + + def test_serdes_to_device(remove_json_files=True): """ Test serdes on transfer dataset pipeline. @@ -405,6 +405,25 @@ def test_serdes_uniform_augment(remove_json_files=True): util_check_serialize_deserialize_file(data, "uniform_augment_pipeline", remove_json_files) +def skip_test_serdes_fill(remove_json_files=True): + """ + Test serdes on Fill data transform. + """ + def gen(): + yield (np.array([4, 5, 6, 7], dtype=np.int32),) + + data = ds.GeneratorDataset(gen, column_names=["col"]) + fill_op = c.Fill(3) + + data = data.map(operations=fill_op, input_columns=["col"]) + expected = np.array([3, 3, 3, 3], dtype=np.int32) + for data_row in data: + np.testing.assert_array_equal(data_row[0].asnumpy(), expected) + + # FIXME - need proper serdes support for Fill's fill_value parameter + util_check_serialize_deserialize_file(data, "fill_pipeline", remove_json_files) + + def test_serdes_exception(): """ Test exception case in serdes @@ -465,7 +484,7 @@ def delete_json_files(): # Test save load minddataset -def skip_test_minddataset(add_and_remove_cv_file): +def skip_test_minddataset(add_and_remove_cv_file=True): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 @@ -504,4 +523,9 @@ if __name__ == '__main__': test_serdes_voc_dataset() test_serdes_zip_dataset() test_serdes_random_crop() + test_serdes_to_device() + test_serdes_pyvision() + test_serdes_uniform_augment() + skip_test_serdes_fill() test_serdes_exception() + skip_test_minddataset()