dataset: Fill op: C++ API support, UTs and Pybind decoupling

5 years ago · 7e6a03487e
--- a/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt
@@ -15,7 +15,6 @@ if(ENABLE_PYTHON)
            python/bindings/dataset/engine/ir/execute/bindings.cc
            python/bindings/dataset/engine/ir/schema/bindings.cc
            python/bindings/dataset/kernels/bindings.cc
            python/bindings/dataset/kernels/data/bindings.cc
            python/bindings/dataset/kernels/ir/bindings.cc
            python/bindings/dataset/kernels/ir/image/bindings.cc
            python/bindings/dataset/text/bindings.cc
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc
@@ -1,39 +0,0 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 #include "pybind11/stl_bind.h"

 #include "minddata/dataset/api/python/pybind_register.h"
 #include "minddata/dataset/kernels/data/fill_op.h"
 #include "minddata/dataset/kernels/data/to_float16_op.h"

 namespace mindspore {
 namespace dataset {

 PYBIND_REGISTER(
  FillOp, 1, ([](const py::module *m) {
    (void)py::class_<FillOp, TensorOp, std::shared_ptr<FillOp>>(*m, "FillOp").def(py::init<std::shared_ptr<Tensor>>());
  }));

 PYBIND_REGISTER(ToFloat16Op, 1, ([](const py::module *m) {
                  (void)py::class_<ToFloat16Op, TensorOp, std::shared_ptr<ToFloat16Op>>(*m, "ToFloat16Op",
                                                                                        py::dynamic_attr())
                    .def(py::init<>());
                }));

 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc
@@ -86,6 +86,17 @@ PYBIND_REGISTER(
      }));
  }));

 PYBIND_REGISTER(FillOperation, 1, ([](const py::module *m) {
                  (void)
                    py::class_<transforms::FillOperation, TensorOperation, std::shared_ptr<transforms::FillOperation>>(
                      *m, "FillOperation")
                      .def(py::init([](std::shared_ptr<Tensor> fill_value) {
                        auto fill = std::make_shared<transforms::FillOperation>(fill_value);
                        THROW_IF_ERROR(fill->ValidateParams());
                        return fill;
                      }));
                }));

 PYBIND_REGISTER(MaskOperation, 1, ([](const py::module *m) {
                  (void)
                    py::class_<transforms::MaskOperation, TensorOperation, std::shared_ptr<transforms::MaskOperation>>(
--- a/mindspore/ccsrc/minddata/dataset/api/transforms.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc
@@ -85,6 +85,20 @@ Duplicate::Duplicate() {}
 std::shared_ptr<TensorOperation> Duplicate::Parse() { return std::make_shared<DuplicateOperation>(); }

 #ifndef ENABLE_ANDROID
 // Constructor to Fill
 struct Fill::Data {
  explicit Data(MSTensor fill_value) : fill_value_(fill_value) {}
  MSTensor fill_value_;
 };

 Fill::Fill(MSTensor fill_value) : data_(std::make_shared<Data>(fill_value)) {}

 std::shared_ptr<TensorOperation> Fill::Parse() {
  std::shared_ptr<Tensor> out_fill_value;
  Tensor::CreateFromMSTensor(data_->fill_value_, &out_fill_value);
  return std::make_shared<FillOperation>(out_fill_value);
 }

 // Constructor to Mask
 struct Mask::Data {
  explicit Data(RelationalOp op, MSTensor constant, mindspore::DataType ms_type)
--- a/mindspore/ccsrc/minddata/dataset/include/transforms.h
+++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h
@@ -194,6 +194,30 @@ class Duplicate final : public TensorTransform {
  std::shared_ptr<TensorOperation> Parse() override;
 };

 /// \brief Fill Op.
 /// \notes Tensor operation to fill all elements in the tensor with the specified value.
 ///    The output tensor will have the same shape and type as the input tensor.
 class Fill final : public TensorTransform {
 public:
  /// \brief Constructor.
  /// \param[in] fill_value Scalar value to fill the tensor with.
  ///               Can only be MSTensor of the following types from mindspore::DataType:
  ///               String, Bool, Int8/16/32/64, UInt8/16/32/64, Float16/32/64.
  explicit Fill(MSTensor fill_value);

  /// \brief Destructor
  ~Fill() = default;

 protected:
  /// \brief Function to convert TensorTransform object into a TensorOperation object.
  /// \return Shared pointer to TensorOperation object.
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
 };

 /// \brief Mask Op.
 /// \notes Mask content of the input tensor with the given predicate.
 ///     Any element of the tensor that matches the predicate will be evaluated to True, otherwise False.
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc
@@ -26,6 +26,7 @@
 #endif
 #include "minddata/dataset/kernels/data/duplicate_op.h"
 #ifndef ENABLE_ANDROID
 #include "minddata/dataset/kernels/data/fill_op.h"
 #include "minddata/dataset/kernels/data/mask_op.h"
 #endif
 #include "minddata/dataset/kernels/data/one_hot_op.h"
@@ -111,6 +112,29 @@ Status DuplicateOperation::ValidateParams() { return Status::OK(); }
 std::shared_ptr<TensorOp> DuplicateOperation::Build() { return std::make_shared<DuplicateOp>(); }

 #ifndef ENABLE_ANDROID

 // FillOperation
 FillOperation::FillOperation(std::shared_ptr<Tensor> fill_value) : fill_value_(fill_value) {}

 Status FillOperation::ValidateParams() {
  if (fill_value_->shape() != TensorShape::CreateScalar()) {
    std::string err_msg = "Fill: fill_value is not a scalar tensor.";
    MS_LOG(ERROR) << err_msg;
    RETURN_STATUS_SYNTAX_ERROR(err_msg);
  }

  return Status::OK();
 }

 std::shared_ptr<TensorOp> FillOperation::Build() { return std::make_shared<FillOp>(fill_value_); }

 Status FillOperation::to_json(nlohmann::json *out_json) {
  nlohmann::json args;
  args["fill_value"] = fill_value_->ToString();
  *out_json = args;
  return Status::OK();
 }

 // MaskOperation
 MaskOperation::MaskOperation(RelationalOp op, const std::shared_ptr<Tensor> &constant, DataType dtype)
    : op_(op), constant_(constant), dtype_(dtype) {}
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h
@@ -31,6 +31,7 @@ namespace dataset {
 constexpr char kComposeOperation[] = "Compose";
 constexpr char kConcatenateOperation[] = "Concatenate";
 constexpr char kDuplicateOperation[] = "Duplicate";
 constexpr char kFillOperation[] = "Fill";
 constexpr char kMaskOperation[] = "Mask";
 constexpr char kOneHotOperation[] = "OneHot";
 constexpr char kPadEndOperation[] = "PadEnd";
@@ -93,6 +94,24 @@ class DuplicateOperation : public TensorOperation {
  std::string Name() const override { return kDuplicateOperation; }
 };

 class FillOperation : public TensorOperation {
 public:
  explicit FillOperation(std::shared_ptr<Tensor> fill_value);

  ~FillOperation() = default;

  std::shared_ptr<TensorOp> Build() override;

  Status ValidateParams() override;

  std::string Name() const override { return kFillOperation; }

  Status to_json(nlohmann::json *out_json) override;

 private:
  std::shared_ptr<Tensor> fill_value_;
 };

 class MaskOperation : public TensorOperation {
 public:
  explicit MaskOperation(RelationalOp op, const std::shared_ptr<Tensor> &constant, DataType dtype);
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@@ -77,7 +77,7 @@ class OneHot(TensorOperation):
        return cde.OneHotOperation(self.num_classes)


 class Fill(cde.FillOp):
 class Fill(TensorOperation):
    """
    Tensor operation to fill all elements in the tensor with the specified value.
    The output tensor will have the same shape and type as the input tensor.
@@ -101,7 +101,10 @@ class Fill(cde.FillOp):

    @check_fill_value
    def __init__(self, fill_value):
        super().__init__(cde.Tensor(np.array(fill_value)))
        self.fill_value = cde.Tensor(np.array(fill_value))

    def parse(self):
        return cde.FillOperation(self.fill_value)


 class TypeCast(TensorOperation):
--- a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
@@ -475,6 +475,82 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic7) {
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestRandomDatasetUInt8) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetUInt8.";

  // Create a RandomDataset with UInt8 numbers for given shape
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(963);
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4});
  std::shared_ptr<Dataset> ds = RandomData(3, schema);
  EXPECT_NE(ds, nullptr);
  ds = ds->SetNumWorkers(3);
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["col1"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 3);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestRandomDatasetFloat) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetFloat.";

  // Create a RandomDataset with Float numbers for given shape
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(369);
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("col1", mindspore::DataType::kNumberTypeFloat16, {2, 3});
  std::shared_ptr<Dataset> ds = RandomData(4, schema);
  EXPECT_NE(ds, nullptr);
  ds = ds->SetNumWorkers(2);
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["col1"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 4);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestRandomDatasetDuplicateColumnName) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetDuplicateColumnName.";

--- a/tests/ut/cpp/dataset/c_api_transforms_test.cc
+++ b/tests/ut/cpp/dataset/c_api_transforms_test.cc
@@ -241,6 +241,434 @@ TEST_F(MindDataTestPipeline, TestDuplicateSuccess) {
  iter->Stop();
 }

 TEST_F(MindDataTestPipeline, TestFillSuccessInt) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessInt.";

  // Create a RandomDataset with Int32 numbers for given shape
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(864);
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("col1", mindspore::DataType::kNumberTypeInt32, {6});
  std::shared_ptr<Dataset> ds = RandomData(5, schema);
  EXPECT_NE(ds, nullptr);
  ds = ds->SetNumWorkers(3);
  EXPECT_NE(ds, nullptr);

  // Create Fill op - to fill with 3
  std::shared_ptr<Tensor> fill_value_tensor;
  ASSERT_OK(Tensor::CreateScalar(3, &fill_value_tensor));
  mindspore::MSTensor fill_value_MSTensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(fill_value_tensor));
  transforms::Fill mask = transforms::Fill(fill_value_MSTensor);
  ds = ds->Map({mask}, {"col1"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  std::vector<std::vector<int32_t>> expected = {
    {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}, {3, 3, 3, 3, 3, 3}};

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["col1"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    std::shared_ptr<Tensor> de_expected_tensor;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
    mindspore::MSTensor expected_tensor =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
    EXPECT_MSTENSOR_EQ(ind, expected_tensor);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 5);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestFillSuccessBool) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessBool.";

  // Create a RandomDataset with bool values for given shape
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(963);
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("col1", mindspore::DataType::kNumberTypeBool, {4});
  std::shared_ptr<Dataset> ds = RandomData(3, schema);
  EXPECT_NE(ds, nullptr);
  ds = ds->SetNumWorkers(2);
  EXPECT_NE(ds, nullptr);

  // Create Fill op - to fill with zero
  std::shared_ptr<Tensor> fill_value_tensor;
  ASSERT_OK(Tensor::CreateScalar((bool)true, &fill_value_tensor));
  mindspore::MSTensor fill_value_MSTensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(fill_value_tensor));
  transforms::Fill mask = transforms::Fill(fill_value_MSTensor);
  ds = ds->Map({mask}, {"col1"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  std::vector<std::vector<bool>> expected = {
    {true, true, true, true}, {true, true, true, true}, {true, true, true, true}};

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["col1"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    std::shared_ptr<Tensor> de_expected_tensor;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
    mindspore::MSTensor expected_tensor =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
    EXPECT_MSTENSOR_EQ(ind, expected_tensor);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 3);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecast) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecast.";

  // Create a RandomDataset with UInt8 numbers for given shape
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(963);
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4});
  std::shared_ptr<Dataset> ds = RandomData(3, schema);
  EXPECT_NE(ds, nullptr);
  ds = ds->SetNumWorkers(2);
  EXPECT_NE(ds, nullptr);

  // Create Fill op - to fill with -3
  std::shared_ptr<Tensor> fill_value_tensor;
  ASSERT_OK(Tensor::CreateScalar(-3, &fill_value_tensor));
  mindspore::MSTensor fill_value_MSTensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(fill_value_tensor));
  transforms::Fill mask = transforms::Fill(fill_value_MSTensor);
  ds = ds->Map({mask}, {"col1"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  // Note: 2**8 -3 = 256 -3 = 253
  std::vector<std::vector<uint8_t>> expected = {{253, 253, 253, 253}, {253, 253, 253, 253}, {253, 253, 253, 253}};

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["col1"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    std::shared_ptr<Tensor> de_expected_tensor;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
    mindspore::MSTensor expected_tensor =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
    EXPECT_MSTENSOR_EQ(ind, expected_tensor);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 3);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecastZero) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecastZero.";

  // Create a RandomDataset with UInt8 numbers for given shape
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(963);
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("col1", mindspore::DataType::kNumberTypeUInt8, {4});
  std::shared_ptr<Dataset> ds = RandomData(3, schema);
  EXPECT_NE(ds, nullptr);
  ds = ds->SetNumWorkers(2);
  EXPECT_NE(ds, nullptr);

  // Create Fill op - to fill with zero
  std::shared_ptr<Tensor> fill_value_tensor;
  ASSERT_OK(Tensor::CreateScalar(0, &fill_value_tensor));
  mindspore::MSTensor fill_value_MSTensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(fill_value_tensor));
  transforms::Fill mask = transforms::Fill(fill_value_MSTensor);
  ds = ds->Map({mask}, {"col1"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  // Note: 2**8 = 256
  std::vector<std::vector<uint8_t>> expected = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["col1"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    std::shared_ptr<Tensor> de_expected_tensor;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
    mindspore::MSTensor expected_tensor =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
    EXPECT_MSTENSOR_EQ(ind, expected_tensor);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 3);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestFillSuccessDownTypecast16) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessDownTypecast16.";

  // Create a RandomDataset with UInt16 numbers for given shape
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(963);
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("col1", mindspore::DataType::kNumberTypeUInt16, {4});
  std::shared_ptr<Dataset> ds = RandomData(3, schema);
  EXPECT_NE(ds, nullptr);
  ds = ds->SetNumWorkers(2);
  EXPECT_NE(ds, nullptr);

  // Create Fill op - to fill with -3
  std::shared_ptr<Tensor> fill_value_tensor;
  ASSERT_OK(Tensor::CreateScalar(-3, &fill_value_tensor));
  mindspore::MSTensor fill_value_MSTensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(fill_value_tensor));
  transforms::Fill mask = transforms::Fill(fill_value_MSTensor);
  ds = ds->Map({mask}, {"col1"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  // Note: 2**16 -3 = 65536 -3 = 65533
  std::vector<std::vector<uint16_t>> expected = {
    {65533, 65533, 65533, 65533}, {65533, 65533, 65533, 65533}, {65533, 65533, 65533, 65533}};

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["col1"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    std::shared_ptr<Tensor> de_expected_tensor;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
    mindspore::MSTensor expected_tensor =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
    EXPECT_MSTENSOR_EQ(ind, expected_tensor);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 3);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestFillSuccessUpTypecast) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessUpTypecast.";

  // Create a RandomDataset with Float numbers for given shape
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(963);
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("col1", mindspore::DataType::kNumberTypeFloat32, {2});
  std::shared_ptr<Dataset> ds = RandomData((float)4.0, schema);
  EXPECT_NE(ds, nullptr);
  ds = ds->SetNumWorkers(2);
  EXPECT_NE(ds, nullptr);

  // Create Fill op - to fill with zeroes
  std::shared_ptr<Tensor> fill_value_tensor;
  ASSERT_OK(Tensor::CreateScalar(0, &fill_value_tensor));
  mindspore::MSTensor fill_value_MSTensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(fill_value_tensor));
  transforms::Fill mask = transforms::Fill(fill_value_MSTensor);
  ds = ds->Map({mask}, {"col1"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  std::vector<std::vector<float_t>> expected = {{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}};

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["col1"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    std::shared_ptr<Tensor> de_expected_tensor;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
    mindspore::MSTensor expected_tensor =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
    EXPECT_MSTENSOR_EQ(ind, expected_tensor);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 4);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestFillSuccessString) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillSuccessString.";

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testTokenizerData/basic_tokenizer.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  // Create Skip operation on ds
  ds = ds->Skip(6);
  EXPECT_NE(ds, nullptr);

  // Create BasicTokenizer operation on ds
  std::shared_ptr<TensorTransform> basic_tokenizer = std::make_shared<text::BasicTokenizer>(true);
  EXPECT_NE(basic_tokenizer, nullptr);

  // Create Map operation on ds
  ds = ds->Map({basic_tokenizer}, {"text"});
  EXPECT_NE(ds, nullptr);

  // Create Fill op - to fill with string
  std::shared_ptr<Tensor> fill_value_tensor;
  ASSERT_OK(Tensor::CreateScalar<std::string>("Hello", &fill_value_tensor));
  mindspore::MSTensor fill_value_MSTensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(fill_value_tensor));
  transforms::Fill mask = transforms::Fill(fill_value_MSTensor);
  ds = ds->Map({mask}, {"text"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  iter->GetNextRow(&row);

  std::vector<std::string> expected = {"Hello", "Hello", "Hello", "Hello", "Hello"};
  std::shared_ptr<Tensor> de_expected_tensor;
  ASSERT_OK(Tensor::CreateFromVector(expected, &de_expected_tensor));
  mindspore::MSTensor expected_tensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));

  uint64_t i = 0;
  while (row.size() != 0) {
    auto ind = row["text"];
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    EXPECT_MSTENSOR_EQ(ind, expected_tensor);
    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 1);

  // Manually terminate the pipeline
  iter->Stop();
 }

 TEST_F(MindDataTestPipeline, TestFillFailFillValueNotScalar) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFillFailFillValueNotScalar.";
  // Test BasicTokenizer with lower_case true

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testTokenizerData/basic_tokenizer.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  // Create Skip operation on ds
  ds = ds->Skip(6);
  EXPECT_NE(ds, nullptr);

  // Create BasicTokenizer operation on ds
  std::shared_ptr<TensorTransform> basic_tokenizer = std::make_shared<text::BasicTokenizer>(true);
  EXPECT_NE(basic_tokenizer, nullptr);

  // Create Map operation on ds
  ds = ds->Map({basic_tokenizer}, {"text"});
  EXPECT_NE(ds, nullptr);

  // Create Fill op - with wrongful vector shape instead of scalar
  std::vector<std::string> fill_string = {"ERROR"};
  std::shared_ptr<Tensor> fill_value_tensor;
  ASSERT_OK(Tensor::CreateFromVector(fill_string, &fill_value_tensor));
  mindspore::MSTensor fill_value_MSTensor =
    mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(fill_value_tensor));
  transforms::Fill mask = transforms::Fill(fill_value_MSTensor);
  ds = ds->Map({mask}, {"text"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();

  // Expect failure: invalid Fill parameter (the shape of fill_value is not a scalar)
  EXPECT_EQ(iter, nullptr);
 }

 TEST_F(MindDataTestPipeline, TestMaskSuccess) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskSuccess.";

@@ -866,4 +1294,4 @@ TEST_F(MindDataTestPipeline, TestTypeCastFail) {
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  // Expect failure: invalid TypeCast input
  EXPECT_EQ(iter, nullptr);
 }
 }
--- a/tests/ut/python/dataset/test_fill_op.py
+++ b/tests/ut/python/dataset/test_fill_op.py
@@ -73,6 +73,19 @@ def test_fillop_string():
        np.testing.assert_array_equal(data_row[0], expected)


 def test_fillop_bytes():
    def gen():
        yield (np.array(["A", "B", "C"], dtype='S'),)

    data = ds.GeneratorDataset(gen, column_names=["col"])
    fill_op = data_trans.Fill(b'abc')

    data = data.map(operations=fill_op, input_columns=["col"])
    expected = np.array([b'abc', b'abc', b'abc'], dtype='S')
    for data_row in data.create_tuple_iterator(output_numpy=True):
        np.testing.assert_array_equal(data_row[0], expected)


 def test_fillop_error_handling():
    def gen():
        yield (np.array([4, 4, 4, 4]),)
@@ -92,4 +105,5 @@ if __name__ == "__main__":
    test_fillop_up_type_cast()
    test_fillop_down_type_cast()
    test_fillop_string()
    test_fillop_bytes()
    test_fillop_error_handling()
--- a/tests/ut/python/dataset/test_serdes_dataset.py
+++ b/tests/ut/python/dataset/test_serdes_dataset.py
@@ -145,90 +145,6 @@ def test_serdes_mnist_dataset(remove_json_files=True):
        delete_json_files()


 def test_serdes_zip_dataset(remove_json_files=True):
    """
    Test serdes on zip dataset pipeline.
    """
    files = ["../data/dataset/testTFTestAllTypes/test.data"]
    schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
    ds.config.set_seed(1)

    ds0 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.GLOBAL)
    data1 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.GLOBAL)
    data2 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.FILES)
    data2 = data2.shuffle(10000)
    data2 = data2.rename(input_columns=["col_sint16", "col_sint32", "col_sint64", "col_float",
                                        "col_1d", "col_2d", "col_3d", "col_binary"],
                         output_columns=["column_sint16", "column_sint32", "column_sint64", "column_float",
                                         "column_1d", "column_2d", "column_3d", "column_binary"])
    data3 = ds.zip((data1, data2))
    ds.serialize(data3, "zip_dataset_pipeline.json")
    assert validate_jsonfile("zip_dataset_pipeline.json") is True
    assert validate_jsonfile("zip_dataset_pipeline_typo.json") is False

    data4 = ds.deserialize(json_filepath="zip_dataset_pipeline.json")
    ds.serialize(data4, "zip_dataset_pipeline_1.json")
    assert validate_jsonfile("zip_dataset_pipeline_1.json") is True
    assert filecmp.cmp('zip_dataset_pipeline.json', 'zip_dataset_pipeline_1.json')

    rows = 0
    for d0, d3, d4 in zip(ds0.create_tuple_iterator(output_numpy=True), data3.create_tuple_iterator(output_numpy=True),
                          data4.create_tuple_iterator(output_numpy=True)):
        num_cols = len(d0)
        offset = 0
        for t1 in d0:
            np.testing.assert_array_equal(t1, d3[offset])
            np.testing.assert_array_equal(t1, d3[offset + num_cols])
            np.testing.assert_array_equal(t1, d4[offset])
            np.testing.assert_array_equal(t1, d4[offset + num_cols])
            offset += 1
        rows += 1
    assert rows == 12

    if remove_json_files:
        delete_json_files()


 def test_serdes_random_crop():
    """
    Test serdes on RandomCrop pipeline.
    """
    logger.info("test_random_crop")
    DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
    SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
    original_seed = config_get_set_seed(1)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
    decode_op = vision.Decode()
    random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    data1 = data1.map(operations=decode_op, input_columns="image")
    data1 = data1.map(operations=random_crop_op, input_columns="image")

    # Serializing into python dictionary
    ds1_dict = ds.serialize(data1)
    # Serializing into json object
    _ = json.dumps(ds1_dict, indent=2)

    # Reconstruct dataset pipeline from its serialized form
    data1_1 = ds.deserialize(input_dict=ds1_dict)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
    data2 = data2.map(operations=decode_op, input_columns="image")

    for item1, item1_1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
                                     data1_1.create_dict_iterator(num_epochs=1, output_numpy=True),
                                     data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        np.testing.assert_array_equal(item1['image'], item1_1['image'])
        _ = item2["image"]

    # Restore configuration num_parallel_workers
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)


 def test_serdes_cifar10_dataset(remove_json_files=True):
    """
    Test serdes on Cifar10 dataset pipeline
@@ -351,6 +267,90 @@ def test_serdes_voc_dataset(remove_json_files=True):
    ds.config.set_num_parallel_workers(original_num_parallel_workers)


 def test_serdes_zip_dataset(remove_json_files=True):
    """
    Test serdes on zip dataset pipeline.
    """
    files = ["../data/dataset/testTFTestAllTypes/test.data"]
    schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
    ds.config.set_seed(1)

    ds0 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.GLOBAL)
    data1 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.GLOBAL)
    data2 = ds.TFRecordDataset(files, schema=schema_file, shuffle=ds.Shuffle.FILES)
    data2 = data2.shuffle(10000)
    data2 = data2.rename(input_columns=["col_sint16", "col_sint32", "col_sint64", "col_float",
                                        "col_1d", "col_2d", "col_3d", "col_binary"],
                         output_columns=["column_sint16", "column_sint32", "column_sint64", "column_float",
                                         "column_1d", "column_2d", "column_3d", "column_binary"])
    data3 = ds.zip((data1, data2))
    ds.serialize(data3, "zip_dataset_pipeline.json")
    assert validate_jsonfile("zip_dataset_pipeline.json") is True
    assert validate_jsonfile("zip_dataset_pipeline_typo.json") is False

    data4 = ds.deserialize(json_filepath="zip_dataset_pipeline.json")
    ds.serialize(data4, "zip_dataset_pipeline_1.json")
    assert validate_jsonfile("zip_dataset_pipeline_1.json") is True
    assert filecmp.cmp('zip_dataset_pipeline.json', 'zip_dataset_pipeline_1.json')

    rows = 0
    for d0, d3, d4 in zip(ds0.create_tuple_iterator(output_numpy=True), data3.create_tuple_iterator(output_numpy=True),
                          data4.create_tuple_iterator(output_numpy=True)):
        num_cols = len(d0)
        offset = 0
        for t1 in d0:
            np.testing.assert_array_equal(t1, d3[offset])
            np.testing.assert_array_equal(t1, d3[offset + num_cols])
            np.testing.assert_array_equal(t1, d4[offset])
            np.testing.assert_array_equal(t1, d4[offset + num_cols])
            offset += 1
        rows += 1
    assert rows == 12

    if remove_json_files:
        delete_json_files()


 def test_serdes_random_crop():
    """
    Test serdes on RandomCrop pipeline.
    """
    logger.info("test_random_crop")
    DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
    SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
    original_seed = config_get_set_seed(1)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
    decode_op = vision.Decode()
    random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    data1 = data1.map(operations=decode_op, input_columns="image")
    data1 = data1.map(operations=random_crop_op, input_columns="image")

    # Serializing into python dictionary
    ds1_dict = ds.serialize(data1)
    # Serializing into json object
    _ = json.dumps(ds1_dict, indent=2)

    # Reconstruct dataset pipeline from its serialized form
    data1_1 = ds.deserialize(input_dict=ds1_dict)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
    data2 = data2.map(operations=decode_op, input_columns="image")

    for item1, item1_1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
                                     data1_1.create_dict_iterator(num_epochs=1, output_numpy=True),
                                     data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        np.testing.assert_array_equal(item1['image'], item1_1['image'])
        _ = item2["image"]

    # Restore configuration num_parallel_workers
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)


 def test_serdes_to_device(remove_json_files=True):
    """
    Test serdes on transfer dataset pipeline.
@@ -405,6 +405,25 @@ def test_serdes_uniform_augment(remove_json_files=True):
    util_check_serialize_deserialize_file(data, "uniform_augment_pipeline", remove_json_files)


 def skip_test_serdes_fill(remove_json_files=True):
    """
    Test serdes on Fill data transform.
    """
    def gen():
        yield (np.array([4, 5, 6, 7], dtype=np.int32),)

    data = ds.GeneratorDataset(gen, column_names=["col"])
    fill_op = c.Fill(3)

    data = data.map(operations=fill_op, input_columns=["col"])
    expected = np.array([3, 3, 3, 3], dtype=np.int32)
    for data_row in data:
        np.testing.assert_array_equal(data_row[0].asnumpy(), expected)

    # FIXME - need proper serdes support for Fill's fill_value parameter
    util_check_serialize_deserialize_file(data, "fill_pipeline", remove_json_files)


 def test_serdes_exception():
    """
    Test exception case in serdes
@@ -465,7 +484,7 @@ def delete_json_files():


 # Test save load minddataset
 def skip_test_minddataset(add_and_remove_cv_file):
 def skip_test_minddataset(add_and_remove_cv_file=True):
    """tutorial for cv minderdataset."""
    columns_list = ["data", "file_name", "label"]
    num_readers = 4
@@ -504,4 +523,9 @@ if __name__ == '__main__':
    test_serdes_voc_dataset()
    test_serdes_zip_dataset()
    test_serdes_random_crop()
    test_serdes_to_device()
    test_serdes_pyvision()
    test_serdes_uniform_augment()
    skip_test_serdes_fill()
    test_serdes_exception()
    skip_test_minddataset()