From 4f98ecb433f779e6683781f65522be5c4a9e41e2 Mon Sep 17 00:00:00 2001
From: luoyang <luoyang42@huawei.com>
Date: Fri, 28 Aug 2020 10:51:41 +0800
Subject: [PATCH] Fix bugs in c-api: rename, concat, take, sampler, duplicate
 column & Change docstring of OneHot

---
 .../ccsrc/minddata/dataset/api/datasets.cc    | 164 +++++++++++-------
 .../ccsrc/minddata/dataset/include/datasets.h |  66 +++----
 mindspore/dataset/transforms/c_transforms.py  |   2 +-
 .../cpp/dataset/c_api_dataset_cifar_test.cc   |  30 ++++
 .../ut/cpp/dataset/c_api_dataset_coco_test.cc |  11 ++
 .../ut/cpp/dataset/c_api_dataset_csv_test.cc  |  11 ++
 .../dataset/c_api_dataset_manifest_test.cc    |  13 +-
 .../ut/cpp/dataset/c_api_dataset_ops_test.cc  |  74 +++++++-
 .../dataset/c_api_dataset_randomdata_test.cc  |  26 ++-
 .../ut/cpp/dataset/c_api_dataset_voc_test.cc  |  10 ++
 tests/ut/cpp/dataset/c_api_datasets_test.cc   |  57 +++++-
 11 files changed, 350 insertions(+), 114 deletions(-)
diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
index 8ace68d2a3..c231146168 100644
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -191,8 +191,8 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
 }
 
 // Function to create a ManifestDataset.
-std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage,
-                                          std::shared_ptr<SamplerObj> sampler,
+std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage,
+                                          const std::shared_ptr<SamplerObj> &sampler,
                                           const std::map<std::string, int32_t> &class_indexing, bool decode) {
   auto ds = std::make_shared<ManifestDataset>(dataset_file, usage, sampler, class_indexing, decode);
 
@@ -211,7 +211,7 @@ std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::s
 // Function to overload "+" operator to concat two datasets
 std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
                                          const std::shared_ptr<Dataset> &datasets2) {
-  std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets1, datasets2}));
+  std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets2, datasets1}));
 
   // Call derived class validation method.
   return ds->ValidateParams() ? ds : nullptr;
@@ -580,13 +580,6 @@ bool SchemaObj::from_json(nlohmann::json json_obj) {
 
 // OTHER FUNCTIONS
 
-// Helper function to create default RandomSampler.
-std::shared_ptr<SamplerObj> CreateDefaultSampler() {
-  const int32_t num_samples = 0;  // 0 means to sample all ids.
-  bool replacement = false;
-  return std::make_shared<RandomSamplerObj>(replacement, num_samples);
-}
-
 // Helper function to compute a default shuffle size
 Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
                           int64_t *shuffle_size) {
@@ -682,6 +675,36 @@ bool ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_sha
   return true;
 }
 
+// Helper function to validate dataset sampler parameter
+bool ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler) {
+  if (sampler == nullptr) {
+    MS_LOG(ERROR) << dataset_name << ": Sampler is not constructed correctly, sampler: nullptr";
+    return false;
+  }
+  return true;
+}
+
+// Helper function to validate dataset input/output column parameter
+bool ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param,
+                                const std::vector<std::string> &columns) {
+  if (columns.empty()) {
+    MS_LOG(ERROR) << dataset_name << ":" << column_param << " should not be empty";
+    return false;
+  }
+  for (uint32_t i = 0; i < columns.size(); ++i) {
+    if (columns[i].empty()) {
+      MS_LOG(ERROR) << dataset_name << ":" << column_param << "[" << i << "] should not be empty";
+      return false;
+    }
+  }
+  std::set<std::string> columns_set(columns.begin(), columns.end());
+  if (columns_set.size() != columns.size()) {
+    MS_LOG(ERROR) << dataset_name << ":" << column_param << ": Every column name should not be same with others";
+    return false;
+  }
+  return true;
+}
+
 /* ####################################### Derived Dataset classes ################################# */
 
 // DERIVED DATASET CLASSES LEAF-NODE DATASETS
@@ -701,6 +724,9 @@ bool CelebADataset::ValidateParams() {
   if (!ValidateDatasetDirParam("CelebADataset", dataset_dir_)) {
     return false;
   }
+  if (!ValidateDatasetSampler("CelebADataset", sampler_)) {
+    return false;
+  }
   std::set<std::string> dataset_type_list = {"all", "train", "valid", "test"};
   auto iter = dataset_type_list.find(dataset_type_);
   if (iter == dataset_type_list.end()) {
@@ -715,11 +741,6 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
-  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
   RETURN_EMPTY_IF_ERROR(
     schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
@@ -736,18 +757,15 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
 Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler)
     : dataset_dir_(dataset_dir), sampler_(sampler) {}
 
-bool Cifar10Dataset::ValidateParams() { return ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_); }
+bool Cifar10Dataset::ValidateParams() {
+  return ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_) && ValidateDatasetSampler("Cifar10Dataset", sampler_);
+}
 
 // Function to build CifarOp for Cifar10
 std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() {
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
-  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   // Do internal Schema generation.
   auto schema = std::make_unique<DataSchema>();
   RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
@@ -765,18 +783,16 @@ std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() {
 Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler)
     : dataset_dir_(dataset_dir), sampler_(sampler) {}
 
-bool Cifar100Dataset::ValidateParams() { return ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_); }
+bool Cifar100Dataset::ValidateParams() {
+  return ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_) &&
+         ValidateDatasetSampler("Cifar100Dataset", sampler_);
+}
 
 // Function to build CifarOp for Cifar100
 std::vector<std::shared_ptr<DatasetOp>> Cifar100Dataset::Build() {
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
-  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   // Do internal Schema generation.
   auto schema = std::make_unique<DataSchema>();
   RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
@@ -987,6 +1003,9 @@ bool CocoDataset::ValidateParams() {
   if (!ValidateDatasetDirParam("CocoDataset", dataset_dir_)) {
     return false;
   }
+  if (!ValidateDatasetSampler("CocoDataset", sampler_)) {
+    return false;
+  }
   Path annotation_file(annotation_file_);
   if (!annotation_file.Exists()) {
     MS_LOG(ERROR) << "annotation_file is invalid or not exist";
@@ -1006,11 +1025,6 @@ std::vector<std::shared_ptr<DatasetOp>> CocoDataset::Build() {
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
-  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   CocoOp::TaskType task_type;
   if (task_ == "Detection") {
     task_type = CocoOp::TaskType::Detection;
@@ -1100,6 +1114,12 @@ bool CSVDataset::ValidateParams() {
     return false;
   }
 
+  if (!column_names_.empty()) {
+    if (!ValidateDatasetColumnParam("CSVDataset", "column_names", column_names_)) {
+      return false;
+    }
+  }
+
   return true;
 }
 
@@ -1155,17 +1175,15 @@ ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std
       class_indexing_(class_indexing),
       exts_(extensions) {}
 
-bool ImageFolderDataset::ValidateParams() { return ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_); }
+bool ImageFolderDataset::ValidateParams() {
+  return ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_) &&
+         ValidateDatasetSampler("ImageFolderDataset", sampler_);
+}
 
 std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() {
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
-  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   // Do internal Schema generation.
   // This arg is exist in ImageFolderOp, but not externalized (in Python API).
   std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
@@ -1180,7 +1198,8 @@ std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() {
   return node_ops;
 }
 
-ManifestDataset::ManifestDataset(std::string dataset_file, std::string usage, std::shared_ptr<SamplerObj> sampler,
+ManifestDataset::ManifestDataset(const std::string &dataset_file, const std::string &usage,
+                                 const std::shared_ptr<SamplerObj> &sampler,
                                  const std::map<std::string, int32_t> &class_indexing, bool decode)
     : dataset_file_(dataset_file), usage_(usage), decode_(decode), class_index_(class_indexing), sampler_(sampler) {}
 
@@ -1190,6 +1209,9 @@ bool ManifestDataset::ValidateParams() {
     MS_LOG(ERROR) << "dataset file: [" << dataset_file_ << "] is invalid or not exist";
     return false;
   }
+  if (!ValidateDatasetSampler("ManifestDataset", sampler_)) {
+    return false;
+  }
 
   std::vector<std::string> usage_list = {"train", "eval", "inference"};
   if (find(usage_list.begin(), usage_list.end(), usage_) == usage_list.end()) {
@@ -1204,11 +1226,6 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() {
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
-  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   // Do internal Schema generation.
   auto schema = std::make_unique<DataSchema>();
   RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
@@ -1228,17 +1245,14 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() {
 MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler)
     : dataset_dir_(dataset_dir), sampler_(sampler) {}
 
-bool MnistDataset::ValidateParams() { return ValidateDatasetDirParam("MnistDataset", dataset_dir_); }
+bool MnistDataset::ValidateParams() {
+  return ValidateDatasetDirParam("MnistDataset", dataset_dir_) && ValidateDatasetSampler("MnistDataset", sampler_);
+}
 
 std::vector<std::shared_ptr<DatasetOp>> MnistDataset::Build() {
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
-  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   // Do internal Schema generation.
   auto schema = std::make_unique<DataSchema>();
   RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
@@ -1257,6 +1271,14 @@ bool RandomDataset::ValidateParams() {
     MS_LOG(ERROR) << "RandomDataset: total_rows must be greater than 0, now get " << total_rows_;
     return false;
   }
+  if (!ValidateDatasetSampler("RandomDataset", sampler_)) {
+    return false;
+  }
+  if (!columns_list_.empty()) {
+    if (!ValidateDatasetColumnParam("RandomDataset", "columns_list", columns_list_)) {
+      return false;
+    }
+  }
   return true;
 }
 
@@ -1279,11 +1301,6 @@ std::vector<std::shared_ptr<DatasetOp>> RandomDataset::Build() {
     total_rows_ = schema_obj->get_num_rows();
   }
 
-  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   std::string schema_json_string, schema_file_path;
   if (schema_ != nullptr) {
     schema_->set_dataset_type("Random");
@@ -1392,6 +1409,9 @@ bool VOCDataset::ValidateParams() {
     MS_LOG(ERROR) << "Invalid dataset path or no dataset path is specified.";
     return false;
   }
+  if (!ValidateDatasetSampler("VOCDataset", sampler_)) {
+    return false;
+  }
   if (task_ == "Segmentation") {
     if (!class_index_.empty()) {
       MS_LOG(ERROR) << "class_indexing is invalid in Segmentation task.";
@@ -1420,11 +1440,6 @@ std::vector<std::shared_ptr<DatasetOp>> VOCDataset::Build() {
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
-  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
-  if (sampler_ == nullptr) {
-    sampler_ = CreateDefaultSampler();
-  }
-
   auto schema = std::make_unique<DataSchema>();
   VOCOp::TaskType task_type_;
 
@@ -1539,6 +1554,10 @@ bool ConcatDataset::ValidateParams() {
     MS_LOG(ERROR) << "Concat: concatenated datasets are not specified.";
     return false;
   }
+  if (find(datasets_.begin(), datasets_.end(), nullptr) != datasets_.end()) {
+    MS_LOG(ERROR) << "Concat: concatenated dataset should not be null.";
+    return false;
+  }
   return true;
 }
 
@@ -1586,6 +1605,21 @@ bool MapDataset::ValidateParams() {
     MS_LOG(ERROR) << "Map: No operation is specified.";
     return false;
   }
+  if (!input_columns_.empty()) {
+    if (!ValidateDatasetColumnParam("MapDataset", "input_columns", input_columns_)) {
+      return false;
+    }
+  }
+  if (!output_columns_.empty()) {
+    if (!ValidateDatasetColumnParam("MapDataset", "output_columns", output_columns_)) {
+      return false;
+    }
+  }
+  if (!project_columns_.empty()) {
+    if (!ValidateDatasetColumnParam("MapDataset", "project_columns", project_columns_)) {
+      return false;
+    }
+  }
 
   return true;
 }
@@ -1615,12 +1649,12 @@ RenameDataset::RenameDataset(const std::vector<std::string> &input_columns,
     : input_columns_(input_columns), output_columns_(output_columns) {}
 
 bool RenameDataset::ValidateParams() {
-  if (input_columns_.empty() || output_columns_.empty()) {
-    MS_LOG(ERROR) << "input and output columns must be specified";
+  if (input_columns_.size() != output_columns_.size()) {
+    MS_LOG(ERROR) << "RenameDataset: input and output columns must be the same size";
     return false;
   }
-  if (input_columns_.size() != output_columns_.size()) {
-    MS_LOG(ERROR) << "input and output columns must be the same size";
+  if (!ValidateDatasetColumnParam("RenameDataset", "input_columns", input_columns_) ||
+      !ValidateDatasetColumnParam("RenameDataset", "output_columns", output_columns_)) {
     return false;
   }
   return true;
@@ -1713,7 +1747,7 @@ std::vector<std::shared_ptr<DatasetOp>> TakeDataset::Build() {
 
 // Function to validate the parameters for TakeDataset
 bool TakeDataset::ValidateParams() {
-  if (take_count_ < 0 && take_count_ != -1) {
+  if (take_count_ <= 0 && take_count_ != -1) {
     MS_LOG(ERROR) << "Take: take_count should be either -1 or positive integer, take_count: " << take_count_;
     return false;
   }
diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h
index ea3f65a5ed..a367e27d8d 100644
--- a/mindspore/ccsrc/minddata/dataset/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@@ -84,32 +84,32 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");
 //     The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
 /// \param[in] dataset_dir Path to the root directory that contains the dataset.
 /// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'.
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \param[in] decode Decode the images after reading (default=false).
 /// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type = "all",
-                                      const std::shared_ptr<SamplerObj> &sampler = nullptr, bool decode = false,
+                                      const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), bool decode = false,
                                       const std::set<std::string> &extensions = {});
 
 /// \brief Function to create a Cifar10 Dataset
 /// \notes The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir,
-                                        const std::shared_ptr<SamplerObj> &sampler = nullptr);
+                                        const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
 
 /// \brief Function to create a Cifar100 Dataset
 /// \notes The generated dataset has three columns ['image', 'coarse_label', 'fine_label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
-                                          const std::shared_ptr<SamplerObj> &sampler = nullptr);
+                                          const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
 
 /// \brief Function to create a CLUEDataset
 /// \notes The generated dataset has a variable number of columns depending on the task and usage
@@ -146,12 +146,12 @@ std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files,
 /// \param[in] annotation_file Path to the annotation json
 /// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
 /// \param[in] decode Decode the images after reading
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
                                   const std::string &task = "Detection", const bool &decode = false,
-                                  const std::shared_ptr<SamplerObj> &sampler = nullptr);
+                                  const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
 
 /// \brief Function to create a CSVDataset
 /// \notes The generated dataset has a variable number of columns
@@ -185,13 +185,13 @@ std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, c
 ///    The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] decode A flag to decode in ImageFolder
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \param[in] extensions File extensions to be read
 /// \param[in] class_indexing a class name to label map
 /// \return Shared pointer to the current ImageFolderDataset
 std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir, bool decode = false,
-                                                const std::shared_ptr<SamplerObj> &sampler = nullptr,
+                                                const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
                                                 const std::set<std::string> &extensions = {},
                                                 const std::map<std::string, int32_t> &class_indexing = {});
 
@@ -199,25 +199,25 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
 /// \notes The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_file The dataset file to be read
 /// \param[in] usage Need "train", "eval" or "inference" data (default="train")
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
 ///    names will be sorted alphabetically and each class will be given a unique index starting from 0).
 /// \param[in] decode Decode the images after reading (default=false).
 /// \return Shared pointer to the current ManifestDataset
-std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage = "train",
-                                          std::shared_ptr<SamplerObj> sampler = nullptr,
+std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage = "train",
+                                          const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
                                           const std::map<std::string, int32_t> &class_indexing = {},
                                           bool decode = false);
 
 /// \brief Function to create a MnistDataset
 /// \notes The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \return Shared pointer to the current MnistDataset
 std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir,
-                                    const std::shared_ptr<SamplerObj> &sampler = nullptr);
+                                    const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
 
 /// \brief Function to create a ConcatDataset
 /// \notes Reload "+" operator to concat two datasets
@@ -231,14 +231,14 @@ std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &dataset
 /// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random)
 /// \param[in] schema SchemaObj to set column type, data type and data shape
 /// \param[in] columns_list List of columns to be read (default={}, read all columns)
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \return Shared pointer to the current Dataset
 template <typename T = std::shared_ptr<SchemaObj>>
 std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schema = nullptr,
                                           const std::vector<std::string> &columns_list = {},
-                                          std::shared_ptr<SamplerObj> sampler = nullptr) {
-  auto ds = std::make_shared<RandomDataset>(total_rows, schema, std::move(columns_list), std::move(sampler));
+                                          const std::shared_ptr<SamplerObj> &sampler = RandomSampler()) {
+  auto ds = std::make_shared<RandomDataset>(total_rows, schema, columns_list, std::move(sampler));
   return ds->ValidateParams() ? ds : nullptr;
 }
 
@@ -271,13 +271,13 @@ std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &datase
 /// \param[in] mode Set the data list txt file to be readed
 /// \param[in] class_indexing A str-to-int mapping from label name to index
 /// \param[in] decode Decode the images after reading
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
                                 const std::string &mode = "train",
                                 const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,
-                                const std::shared_ptr<SamplerObj> &sampler = nullptr);
+                                const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
 
 /// \brief Function to create a ZipDataset
 /// \notes Applies zip to the dataset
@@ -716,7 +716,7 @@ class ImageFolderDataset : public Dataset {
 class ManifestDataset : public Dataset {
  public:
   /// \brief Constructor
-  ManifestDataset(std::string dataset_file, std::string usage, std::shared_ptr<SamplerObj> sampler,
+  ManifestDataset(const std::string &dataset_file, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
                   const std::map<std::string, int32_t> &class_indexing, bool decode);
 
   /// \brief Destructor
@@ -768,7 +768,7 @@ class RandomDataset : public Dataset {
 
   /// \brief Constructor
   RandomDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema,
-                const std::vector<std::string> &columns_list, std::shared_ptr<SamplerObj> sampler)
+                const std::vector<std::string> &columns_list, const std::shared_ptr<SamplerObj> &sampler)
       : total_rows_(total_rows),
         schema_path_(""),
         schema_(std::move(schema)),
@@ -776,8 +776,8 @@ class RandomDataset : public Dataset {
         sampler_(std::move(sampler)) {}
 
   /// \brief Constructor
-  RandomDataset(const int32_t &total_rows, std::string schema_path, std::vector<std::string> columns_list,
-                std::shared_ptr<SamplerObj> sampler)
+  RandomDataset(const int32_t &total_rows, std::string schema_path, const std::vector<std::string> &columns_list,
+                const std::shared_ptr<SamplerObj> &sampler)
       : total_rows_(total_rows), schema_path_(schema_path), columns_list_(columns_list), sampler_(std::move(sampler)) {}
 
   /// \brief Destructor
diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py
index f4b7cde1fa..f3b20e66a6 100644
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@@ -32,7 +32,7 @@ class OneHot(cde.OneHotOp):
 
     Args:
         num_classes (int): Number of classes of the label
-            it should be bigger than or equal to label class number.
+            it should be bigger than largest label number in dataset.
 
     Raises:
         RuntimeError: feature size is bigger than num_classes.
diff --git a/tests/ut/cpp/dataset/c_api_dataset_cifar_test.cc b/tests/ut/cpp/dataset/c_api_dataset_cifar_test.cc
index 2e4125516d..1c473f6c81 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_cifar_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_cifar_test.cc
@@ -107,3 +107,33 @@ TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) {
   std::shared_ptr<Dataset> ds = Cifar10("", RandomSampler(false, 10));
   EXPECT_EQ(ds, nullptr);
 }
+
+TEST_F(MindDataTestPipeline, TestCifar10DatasetWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar10DatasetWithNullSampler.";
+
+  // Create a Cifar10 Dataset
+  std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
+  std::shared_ptr<Dataset> ds = Cifar10(folder_path, nullptr);
+  // Expect failure: sampler can not be nullptr
+  EXPECT_EQ(ds, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, TestCifar100DatasetWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100DatasetWithNullSampler.";
+
+  // Create a Cifar10 Dataset
+  std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
+  std::shared_ptr<Dataset> ds = Cifar100(folder_path, nullptr);
+  // Expect failure: sampler can not be nullptr
+  EXPECT_EQ(ds, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, TestCifar100DatasetWithWrongSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100DatasetWithWrongSampler.";
+
+  // Create a Cifar10 Dataset
+  std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
+  std::shared_ptr<Dataset> ds = Cifar100(folder_path, RandomSampler(false, -10));
+  // Expect failure: sampler is not construnced correctly
+  EXPECT_EQ(ds, nullptr);
+}
diff --git a/tests/ut/cpp/dataset/c_api_dataset_coco_test.cc b/tests/ut/cpp/dataset/c_api_dataset_coco_test.cc
index dcc21e253b..ec1c784b95 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_coco_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_coco_test.cc
@@ -290,3 +290,14 @@ TEST_F(MindDataTestPipeline, TestCocoStuff) {
   // Manually terminate the pipeline
   iter->Stop();
 }
+
+TEST_F(MindDataTestPipeline, TestCocoWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoWithNullSampler.";
+  // Create a Coco Dataset
+  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
+  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";
+
+  std::shared_ptr<Dataset> ds = Coco(folder_path, annotation_file, "Detection", false, nullptr);
+  // Expect failure: sampler can not be nullptr
+  EXPECT_EQ(ds, nullptr);
+}
diff --git a/tests/ut/cpp/dataset/c_api_dataset_csv_test.cc b/tests/ut/cpp/dataset/c_api_dataset_csv_test.cc
index f005cf301f..c900954d69 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_csv_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_csv_test.cc
@@ -473,3 +473,14 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetShuffleGlobal) {
   GlobalContext::config_manager()->set_seed(original_seed);
   GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers);
 }
+
+TEST_F(MindDataTestPipeline, TestCSVDatasetDuplicateColumnName) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCSVDatasetDuplicateColumnName.";
+
+  // Create a CSVDataset, with single CSV file
+  std::string train_file = datasets_root_path_ + "/testCSV/1.csv";
+  std::vector<std::string> column_names = {"col1", "col1", "col3", "col4"};
+  std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, -1, ShuffleMode::kFalse);
+  // Expect failure: duplicate column names
+  EXPECT_EQ(ds, nullptr);
+}
diff --git a/tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc b/tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
index 5e4c91c765..5911279d02 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
@@ -59,7 +59,7 @@ TEST_F(MindDataTestPipeline, TestManifestDecode) {
 
   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
   // Create a Manifest Dataset
-  std::shared_ptr<Dataset> ds = Manifest(file_path, "train", nullptr, {}, true);
+  std::shared_ptr<Dataset> ds = Manifest(file_path, "train", RandomSampler(), {}, true);
   EXPECT_NE(ds, nullptr);
 
   // Create an iterator over the result of the above dataset
@@ -130,7 +130,7 @@ TEST_F(MindDataTestPipeline, TestManifestClassIndex) {
   std::vector<int> expected_label = {111, 222};
 
   // Create a Manifest Dataset
-  std::shared_ptr<Dataset> ds = Manifest(file_path, "train", nullptr, map, true);
+  std::shared_ptr<Dataset> ds = Manifest(file_path, "train", RandomSampler(), map, true);
   EXPECT_NE(ds, nullptr);
 
   // Create an iterator over the result of the above dataset
@@ -204,3 +204,12 @@ TEST_F(MindDataTestPipeline, TestManifestError) {
   std::shared_ptr<Dataset> ds1 = Manifest(file_path, "invalid_usage");
   EXPECT_EQ(ds1, nullptr);
 }
+
+TEST_F(MindDataTestPipeline, TestManifestWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestWithNullSampler.";
+  std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
+  // Create a Manifest Dataset
+  std::shared_ptr<Dataset> ds = Manifest(file_path, "train", nullptr);
+  // Expect failure: sampler can not be nullptr
+  EXPECT_EQ(ds, nullptr);
+}
diff --git a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
index e65cf8392d..818189e6e2 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
@@ -311,6 +311,34 @@ TEST_F(MindDataTestPipeline, TestProjectMap) {
   iter->Stop();
 }
 
+TEST_F(MindDataTestPipeline, TestMapDuplicateColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMapDuplicateColumn.";
+
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_NE(ds, nullptr);
+
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5);
+  EXPECT_NE(random_vertical_flip_op, nullptr);
+
+  // Create a Map operation on ds
+  auto ds1 = ds->Map({random_vertical_flip_op}, {"image", "image"}, {}, {});
+  // Expect failure: duplicate input column name
+  EXPECT_EQ(ds1, nullptr);
+
+  // Create a Map operation on ds
+  auto ds2 = ds->Map({random_vertical_flip_op}, {}, {"label", "label"}, {});
+  // Expect failure: duplicate output column name
+  EXPECT_EQ(ds2, nullptr);
+
+  // Create a Map operation on ds
+  auto ds3 = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "image"});
+  // Expect failure: duplicate project column name
+  EXPECT_EQ(ds3, nullptr);
+}
+
 TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) {
   MS_LOG(INFO) << "Doing MindDataTestPipeline.TestProjectMapAutoInjection";
 
@@ -362,8 +390,8 @@ TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) {
   iter->Stop();
 }
 
-TEST_F(MindDataTestPipeline, TestRenameFail) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail.";
+TEST_F(MindDataTestPipeline, TestRenameFail1) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail1.";
   // We expect this test to fail because input and output in Rename are not the same size
 
   // Create an ImageFolder Dataset
@@ -381,6 +409,38 @@ TEST_F(MindDataTestPipeline, TestRenameFail) {
   EXPECT_EQ(ds, nullptr);
 }
 
+TEST_F(MindDataTestPipeline, TestRenameFail2) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail2.";
+  // We expect this test to fail because input or output column name is empty
+
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Rename operation on ds
+  ds = ds->Rename({"image", "label"}, {"col2", ""});
+  EXPECT_EQ(ds, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, TestRenameFail3) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail3.";
+  // We expect this test to fail because duplicate column name
+
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Rename operation on ds
+  auto ds1 = ds->Rename({"image", "image"}, {"col1", "col2"});
+  EXPECT_EQ(ds1, nullptr);
+
+  // Create a Rename operation on ds
+  auto ds2 = ds->Rename({"image", "label"}, {"col1", "col1"});
+  EXPECT_EQ(ds2, nullptr);
+}
+
 TEST_F(MindDataTestPipeline, TestRenameSuccess) {
   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameSuccess.";
 
@@ -688,9 +748,15 @@ TEST_F(MindDataTestPipeline, TestTakeDatasetError1) {
 
   // Create a Take operation on ds with invalid count input
   int32_t count = -5;
-  ds = ds->Take(count);
+  auto ds1 = ds->Take(count);
   // Expect nullptr for invalid input take_count
-  EXPECT_EQ(ds, nullptr);
+  EXPECT_EQ(ds1, nullptr);
+
+  // Create a Take operation on ds with invalid count input
+  count = 0;
+  auto ds2 = ds->Take(count);
+  // Expect nullptr for invalid input take_count
+  EXPECT_EQ(ds2, nullptr);
 }
 
 TEST_F(MindDataTestPipeline, TestTakeDatasetNormal) {
diff --git a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
index 22e77a2ddc..0506a58134 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
@@ -265,4 +265,28 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic4) {
   // Manually terminate the pipeline
   iter->Stop();
   GlobalContext::config_manager()->set_seed(curr_seed);
-}
\ No newline at end of file
+}
+
+TEST_F(MindDataTestPipeline, TestRandomDatasetWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetWithNullSampler.";
+
+  // Create a RandomDataset
+  std::shared_ptr<SchemaObj> schema = Schema();
+  schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
+  schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
+  std::shared_ptr<Dataset> ds = RandomData(50, schema, {}, nullptr);
+  // Expect failure: sampler can not be nullptr
+  EXPECT_EQ(ds, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, TestRandomDatasetDuplicateColumnName) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetDuplicateColumnName.";
+
+  // Create a RandomDataset
+  std::shared_ptr<SchemaObj> schema = Schema();
+  schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
+  schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
+  std::shared_ptr<Dataset> ds = RandomData(50, schema, {"image", "image"});
+  // Expect failure: duplicate column names
+  EXPECT_EQ(ds, nullptr);
+}
diff --git a/tests/ut/cpp/dataset/c_api_dataset_voc_test.cc b/tests/ut/cpp/dataset/c_api_dataset_voc_test.cc
index 17fa23198a..ab81d3667d 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_voc_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_voc_test.cc
@@ -194,3 +194,13 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) {
   // Expect nullptr for segmentation task with class_index
   EXPECT_EQ(ds, nullptr);
 }
+
+TEST_F(MindDataTestPipeline, TestVOCWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCWithNullSampler.";
+
+  // Create a VOC Dataset
+  std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
+  std::shared_ptr<Dataset> ds = VOC(folder_path, "Segmentation", "train", {}, false, nullptr);
+  // Expect failure: sampler can not be nullptr
+  EXPECT_EQ(ds, nullptr);
+}
diff --git a/tests/ut/cpp/dataset/c_api_datasets_test.cc b/tests/ut/cpp/dataset/c_api_datasets_test.cc
index 1ae562a618..2952222d72 100644
--- a/tests/ut/cpp/dataset/c_api_datasets_test.cc
+++ b/tests/ut/cpp/dataset/c_api_datasets_test.cc
@@ -118,24 +118,44 @@ TEST_F(MindDataTestPipeline, TestCelebAException) {
   EXPECT_EQ(ds1, nullptr);
 }
 
-TEST_F(MindDataTestPipeline, TestImageFolderFail1) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFail1.";
+TEST_F(MindDataTestPipeline, TestCelebADatasetWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADataset.";
 
-  // Create an ImageFolder Dataset
-  std::shared_ptr<Dataset> ds = ImageFolder("", true, nullptr);
+  // Create a CelebA Dataset
+  std::string folder_path = datasets_root_path_ + "/testCelebAData/";
+  std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", nullptr, false, {});
+  // Expect failure: sampler can not be nullptr
   EXPECT_EQ(ds, nullptr);
 }
 
-TEST_F(MindDataTestPipeline, TestMnistFail1) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFail1.";
+TEST_F(MindDataTestPipeline, TestMnistFailWithWrongDatasetDir) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithWrongDatasetDir.";
 
   // Create a Mnist Dataset
   std::shared_ptr<Dataset> ds = Mnist("", RandomSampler(false, 10));
   EXPECT_EQ(ds, nullptr);
 }
 
-TEST_F(MindDataTestPipeline, TestImageFolderFail2) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFail2.";
+TEST_F(MindDataTestPipeline, TestMnistFailWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithNullSampler.";
+
+  // Create a Mnist Dataset
+  std::string folder_path = datasets_root_path_ + "/testMnistData/";
+  std::shared_ptr<Dataset> ds = Mnist(folder_path, nullptr);
+  // Expect failure: sampler can not be nullptr
+  EXPECT_EQ(ds, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, TestImageFolderWithWrongDatasetDir) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderWithWrongDatasetDir.";
+
+  // Create an ImageFolder Dataset
+  std::shared_ptr<Dataset> ds = ImageFolder("", true, nullptr);
+  EXPECT_EQ(ds, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, TestImageFolderFailWithWrongExtension) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithWrongExtension.";
 
   // Create an ImageFolder Dataset
   std::string folder_path = datasets_root_path_ + "/testPK/data/";
@@ -150,8 +170,29 @@ TEST_F(MindDataTestPipeline, TestImageFolderFail2) {
   // Iterate the dataset and get each row
   std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
   iter->GetNextRow(&row);
+  // Expect no data: can not find files with specified extension
   EXPECT_EQ(row.size(), 0);
 
   // Manually terminate the pipeline
   iter->Stop();
 }
+
+TEST_F(MindDataTestPipeline, TestImageFolderFailWithNullSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithNullSampler.";
+
+  // Create an ImageFolder Dataset
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, nullptr);
+  // Expect failure: sampler can not be nullptr
+  EXPECT_EQ(ds, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, TestImageFolderFailWithWrongSampler) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithWrongSampler.";
+
+  // Create a Cifar10 Dataset
+  std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, SequentialSampler(-2, 5));
+  // Expect failure: sampler is not construnced correctly
+  EXPECT_EQ(ds, nullptr);
+}