diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
index 2e748cae1c..2ab554305b 100644
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -86,6 +86,7 @@
 #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
 
 // IR leaf nodes disabled for android
@@ -140,26 +141,11 @@ bool Dataset::DeviceQueue(bool send_epoch_end) {
     return false;
   }
 
-  // Get a uuid for queue name
-  std::string queue_name = Services::GetUniqueID();
-
-  // TODO(CRC):
-  // Get device type from ms context
-  std::string device_type = "CPU";
-
-  // Get device ID from children
-  int32_t device_id = 0;
-  rc = TransferNode::get_distribution(shared_from_this(), &device_id);
-  if (rc.IsError()) {
-    MS_LOG(ERROR) << "Failed to get shard id. Error status: " << rc;
-    return false;
-  }
-
   // Add TransferNode IR on top of dataset d
-  auto ds = std::make_shared<TransferNode>(shared_from_this(), queue_name, device_id, device_type, send_epoch_end);
+  auto ds = std::make_shared<TransferNode>(shared_from_this()->IRNode(), send_epoch_end);
 
   // Get ToDevice consumer
-  auto consumer = std::make_unique<ToDevice>(device_type, send_epoch_end, -1);
+  auto consumer = std::make_unique<ToDevice>(send_epoch_end, -1);
   ToDevice *consumer_ = consumer.get();
   rc = consumer->Init(ds);
   if (rc.IsError()) {
@@ -199,7 +185,7 @@ bool Dataset::Save(std::string dataset_path, int32_t num_files, std::string data
     return false;
   }
   SaveToDisk *consumer_ = consumer.get();
-  rc = consumer->Init(ds);
+  rc = consumer->Init(ds->IRNode());
   if (rc.IsError()) {
     MS_LOG(ERROR) << "CreateSaver failed." << rc;
     return false;
@@ -225,19 +211,10 @@ bool Dataset::Save(std::string dataset_path, int32_t num_files, std::string data
 #endif
 
 // Constructor
-Dataset::Dataset() {
-  // Fetch some default value from config manager
-  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
-  num_workers_ = cfg->num_parallel_workers();
-  rows_per_buffer_ = cfg->rows_per_buffer();
-  connector_que_size_ = cfg->op_connector_size();
-  worker_connector_size_ = cfg->worker_connector_size();
-  tree_getters_ = std::make_shared<TreeGetters>();
-}
+Dataset::Dataset() { tree_getters_ = std::make_shared<TreeGetters>(); }
 
 int64_t Dataset::GetDatasetSize() {
   int64_t dataset_size;
-  auto ds = shared_from_this();
   Status rc;
   std::unique_ptr<RuntimeContext> runtime_context = std::make_unique<RuntimeContext>();
   rc = runtime_context->Init();
@@ -246,7 +223,7 @@ int64_t Dataset::GetDatasetSize() {
     return -1;
   }
   if (!tree_getters_->isInitialized()) {
-    rc = tree_getters_->Init(ds);
+    rc = tree_getters_->Init(this->IRNode());
     if (rc.IsError()) {
       MS_LOG(ERROR) << "GetDatasetSize: Initializing TreeGetters failed.";
       return -1;
@@ -267,7 +244,7 @@ std::vector<DataType> Dataset::GetOutputTypes() {
     return types;
   }
   if (!tree_getters_->isInitialized()) {
-    rc = tree_getters_->Init(shared_from_this());
+    rc = tree_getters_->Init(this->IRNode());
     if (rc.IsError()) {
       MS_LOG(ERROR) << "GetOutputTypes: Initializing TreeGetters failed.";
       types.clear();
@@ -294,7 +271,7 @@ std::vector<TensorShape> Dataset::GetOutputShapes() {
     return shapes;
   }
   if (!tree_getters_->isInitialized()) {
-    rc = tree_getters_->Init(shared_from_this());
+    rc = tree_getters_->Init(this->IRNode());
     if (rc.IsError()) {
       MS_LOG(ERROR) << "GetOutputShapes: Initializing TreeGetters failed.";
       shapes.clear();
@@ -321,7 +298,7 @@ int64_t Dataset::GetNumClasses() {
     return -1;
   }
   if (!tree_getters_->isInitialized()) {
-    rc = tree_getters_->Init(ds);
+    rc = tree_getters_->Init(ds->IRNode());
     if (rc.IsError()) {
       MS_LOG(ERROR) << "GetNumClasses: Initializing TreeGetters failed.";
       return -1;
@@ -331,9 +308,6 @@ int64_t Dataset::GetNumClasses() {
   return rc.IsError() ? -1 : num_classes;
 }
 
-// Constructor to initialize the cache
-Dataset::Dataset(const std::shared_ptr<DatasetCache> &dataset_cache) : Dataset() { cache_ = dataset_cache; }
-
 /// \brief Function to create a SchemaObj
 /// \param[in] schema_file Path of schema file
 /// \return Shared pointer to the current schema
@@ -346,161 +320,155 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file) {
 // FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS
 // (In alphabetical order)
 
-// Function to create a AlbumNode.
-std::shared_ptr<AlbumNode> Album(const std::string &dataset_dir, const std::string &data_schema,
-                                 const std::vector<std::string> &column_names, bool decode,
-                                 const std::shared_ptr<SamplerObj> &sampler,
-                                 const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<AlbumNode>(dataset_dir, data_schema, column_names, decode, sampler, cache);
+// Function to create a AlbumDataset.
+std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
+                                    const std::vector<std::string> &column_names, bool decode,
+                                    const std::shared_ptr<SamplerObj> &sampler,
+                                    const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<AlbumDataset>(dataset_dir, data_schema, column_names, decode, sampler, cache);
 
   return ds;
 }
 
-// Function to create a CelebANode.
-std::shared_ptr<CelebANode> CelebA(const std::string &dataset_dir, const std::string &usage,
-                                   const std::shared_ptr<SamplerObj> &sampler, bool decode,
-                                   const std::set<std::string> &extensions,
-                                   const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<CelebANode>(dataset_dir, usage, sampler, decode, extensions, cache);
+// Function to create a CelebADataset.
+std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &usage,
+                                      const std::shared_ptr<SamplerObj> &sampler, bool decode,
+                                      const std::set<std::string> &extensions,
+                                      const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<CelebADataset>(dataset_dir, usage, sampler, decode, extensions, cache);
 
   return ds;
 }
 
-// Function to create a Cifar10Node.
-std::shared_ptr<Cifar10Node> Cifar10(const std::string &dataset_dir, const std::string &usage,
-                                     const std::shared_ptr<SamplerObj> &sampler,
-                                     const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<Cifar10Node>(dataset_dir, usage, sampler, cache);
+// Function to create a Cifar10Dataset.
+std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, const std::string &usage,
+                                        const std::shared_ptr<SamplerObj> &sampler,
+                                        const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, usage, sampler, cache);
 
   return ds;
 }
 
-// Function to create a Cifar100Node.
-std::shared_ptr<Cifar100Node> Cifar100(const std::string &dataset_dir, const std::string &usage,
-                                       const std::shared_ptr<SamplerObj> &sampler,
-                                       const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<Cifar100Node>(dataset_dir, usage, sampler, cache);
+// Function to create a Cifar100Dataset.
+std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir, const std::string &usage,
+                                          const std::shared_ptr<SamplerObj> &sampler,
+                                          const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<Cifar100Dataset>(dataset_dir, usage, sampler, cache);
 
   return ds;
 }
 
-// Function to create a CLUENode.
-std::shared_ptr<CLUENode> CLUE(const std::vector<std::string> &clue_files, const std::string &task,
-                               const std::string &usage, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards,
-                               int32_t shard_id, const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<CLUENode>(clue_files, task, usage, num_samples, shuffle, num_shards, shard_id, cache);
+// Function to create a CLUEDataset.
+std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &clue_files, const std::string &task,
+                                  const std::string &usage, int64_t num_samples, ShuffleMode shuffle,
+                                  int32_t num_shards, int32_t shard_id, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<CLUEDataset>(clue_files, task, usage, num_samples, shuffle, num_shards, shard_id, cache);
 
   return ds;
 }
 
-// Function to create a CocoNode.
-std::shared_ptr<CocoNode> Coco(const std::string &dataset_dir, const std::string &annotation_file,
-                               const std::string &task, const bool &decode, const std::shared_ptr<SamplerObj> &sampler,
-                               const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<CocoNode>(dataset_dir, annotation_file, task, decode, sampler, cache);
+// Function to create a CocoDataset.
+std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
+                                  const std::string &task, const bool &decode,
+                                  const std::shared_ptr<SamplerObj> &sampler,
+                                  const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<CocoDataset>(dataset_dir, annotation_file, task, decode, sampler, cache);
 
   return ds;
 }
 
-// Function to create a CSVNode.
-std::shared_ptr<CSVNode> CSV(const std::vector<std::string> &dataset_files, char field_delim,
-                             const std::vector<std::shared_ptr<CsvBase>> &column_defaults,
-                             const std::vector<std::string> &column_names, int64_t num_samples, ShuffleMode shuffle,
-                             int32_t num_shards, int32_t shard_id, const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<CSVNode>(dataset_files, field_delim, column_defaults, column_names, num_samples, shuffle,
-                                      num_shards, shard_id, cache);
+// Function to create a CSVDataset.
+std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, char field_delim,
+                                const std::vector<std::shared_ptr<CsvBase>> &column_defaults,
+                                const std::vector<std::string> &column_names, int64_t num_samples, ShuffleMode shuffle,
+                                int32_t num_shards, int32_t shard_id, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<CSVDataset>(dataset_files, field_delim, column_defaults, column_names, num_samples,
+                                         shuffle, num_shards, shard_id, cache);
 
   return ds;
 }
 
-// Function to create a ImageFolderNode.
-std::shared_ptr<ImageFolderNode> ImageFolder(const std::string &dataset_dir, bool decode,
-                                             const std::shared_ptr<SamplerObj> &sampler,
-                                             const std::set<std::string> &extensions,
-                                             const std::map<std::string, int32_t> &class_indexing,
-                                             const std::shared_ptr<DatasetCache> &cache) {
-  // This arg exists in ImageFolderOp, but not externalized (in Python API). The default value is false.
-  bool recursive = false;
-
-  // Create logical representation of ImageFolderNode.
-  auto ds =
-    std::make_shared<ImageFolderNode>(dataset_dir, decode, sampler, recursive, extensions, class_indexing, cache);
+// Function to create a ImageFolderDataset.
+std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir, bool decode,
+                                                const std::shared_ptr<SamplerObj> &sampler,
+                                                const std::set<std::string> &extensions,
+                                                const std::map<std::string, int32_t> &class_indexing,
+                                                const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<ImageFolderDataset>(dataset_dir, decode, sampler, extensions, class_indexing, cache);
 
   return ds;
 }
 
 #ifndef ENABLE_ANDROID
-// Function to create a ManifestNode.
-std::shared_ptr<ManifestNode> Manifest(const std::string &dataset_file, const std::string &usage,
-                                       const std::shared_ptr<SamplerObj> &sampler,
-                                       const std::map<std::string, int32_t> &class_indexing, bool decode,
-                                       const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<ManifestNode>(dataset_file, usage, sampler, class_indexing, decode, cache);
+// Function to create a ManifestDataset.
+std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage,
+                                          const std::shared_ptr<SamplerObj> &sampler,
+                                          const std::map<std::string, int32_t> &class_indexing, bool decode,
+                                          const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<ManifestDataset>(dataset_file, usage, sampler, class_indexing, decode, cache);
 
   return ds;
 }
 
-// Function to create a MindDataNode.
-std::shared_ptr<MindDataNode> MindData(const std::string &dataset_file, const std::vector<std::string> &columns_list,
-                                       const std::shared_ptr<SamplerObj> &sampler, nlohmann::json padded_sample,
-                                       int64_t num_padded) {
-  auto ds = std::make_shared<MindDataNode>(dataset_file, columns_list, sampler, padded_sample, num_padded);
+// Function to create a MindDataDataset.
+std::shared_ptr<MindDataDataset> MindData(const std::string &dataset_file, const std::vector<std::string> &columns_list,
+                                          const std::shared_ptr<SamplerObj> &sampler, nlohmann::json padded_sample,
+                                          int64_t num_padded) {
+  auto ds = std::make_shared<MindDataDataset>(dataset_file, columns_list, sampler, padded_sample, num_padded);
 
   return ds;
 }
 
-// Function to create a MindDataNode.
-std::shared_ptr<MindDataNode> MindData(const std::vector<std::string> &dataset_files,
-                                       const std::vector<std::string> &columns_list,
-                                       const std::shared_ptr<SamplerObj> &sampler, nlohmann::json padded_sample,
-                                       int64_t num_padded) {
-  auto ds = std::make_shared<MindDataNode>(dataset_files, columns_list, sampler, padded_sample, num_padded);
+// Function to create a MindDataDataset.
+std::shared_ptr<MindDataDataset> MindData(const std::vector<std::string> &dataset_files,
+                                          const std::vector<std::string> &columns_list,
+                                          const std::shared_ptr<SamplerObj> &sampler, nlohmann::json padded_sample,
+                                          int64_t num_padded) {
+  auto ds = std::make_shared<MindDataDataset>(dataset_files, columns_list, sampler, padded_sample, num_padded);
 
   return ds;
 }
 #endif
 
-// Function to create a MnistNode.
-std::shared_ptr<MnistNode> Mnist(const std::string &dataset_dir, const std::string &usage,
-                                 const std::shared_ptr<SamplerObj> &sampler,
-                                 const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<MnistNode>(dataset_dir, usage, sampler, cache);
+// Function to create a MnistDataset.
+std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage,
+                                    const std::shared_ptr<SamplerObj> &sampler,
+                                    const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<MnistDataset>(dataset_dir, usage, sampler, cache);
 
   return ds;
 }
 
 // Function to overload "+" operator to concat two datasets
-std::shared_ptr<ConcatNode> operator+(const std::shared_ptr<Dataset> &datasets1,
-                                      const std::shared_ptr<Dataset> &datasets2) {
-  std::shared_ptr<ConcatNode> ds = std::make_shared<ConcatNode>(std::vector({datasets2, datasets1}));
-
-  return ds;
+std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
+                                         const std::shared_ptr<Dataset> &datasets2) {
+  return std::make_shared<ConcatDataset>(std::vector({datasets2, datasets1}));
 }
 
-// Function to create a TextFileNode.
-std::shared_ptr<TextFileNode> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples,
-                                       ShuffleMode shuffle, int32_t num_shards, int32_t shard_id,
-                                       const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<TextFileNode>(dataset_files, num_samples, shuffle, num_shards, shard_id, cache);
+// Function to create a TextFileDataset.
+std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples,
+                                          ShuffleMode shuffle, int32_t num_shards, int32_t shard_id,
+                                          const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<TextFileDataset>(dataset_files, num_samples, shuffle, num_shards, shard_id, cache);
 
   return ds;
 }
 
 #ifndef ENABLE_ANDROID
-// Function to create a VOCNode.
-std::shared_ptr<VOCNode> VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage,
-                             const std::map<std::string, int32_t> &class_indexing, bool decode,
-                             const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) {
-  auto ds = std::make_shared<VOCNode>(dataset_dir, task, usage, class_indexing, decode, sampler, cache);
+// Function to create a VOCDataset.
+std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage,
+                                const std::map<std::string, int32_t> &class_indexing, bool decode,
+                                const std::shared_ptr<SamplerObj> &sampler,
+                                const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<VOCDataset>(dataset_dir, task, usage, class_indexing, decode, sampler, cache);
 
   return ds;
 }
 #endif
 
 // Function to create a ZipNode.
-std::shared_ptr<ZipNode> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets) {
-  auto ds = std::make_shared<ZipNode>(datasets);
-
+std::shared_ptr<ZipDataset> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets) {
+  auto ds = std::make_shared<ZipDataset>(datasets);
   return ds;
 }
 
@@ -508,170 +476,112 @@ std::shared_ptr<ZipNode> Zip(const std::vector<std::shared_ptr<Dataset>> &datase
 // (In alphabetical order)
 
 // Function to create a Batch dataset
-std::shared_ptr<BatchNode> Dataset::Batch(int32_t batch_size, bool drop_remainder) {
+BatchDataset::BatchDataset(std::shared_ptr<Dataset> input, int32_t batch_size, bool drop_remainder) {
   // Default values
   std::vector<std::string> cols_to_map = {};
   std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map;
   bool pad = false;
-  auto ds = std::make_shared<BatchNode>(shared_from_this(), batch_size, drop_remainder, pad, cols_to_map, pad_map);
-
-  return ds;
+  auto ds = std::make_shared<BatchNode>(input->IRNode(), batch_size, drop_remainder, pad, cols_to_map, pad_map);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
 #ifndef ENABLE_ANDROID
 // Function to create a BucketBatchByLength dataset
-std::shared_ptr<BucketBatchByLengthNode> Dataset::BucketBatchByLength(
-  const std::vector<std::string> &column_names, const std::vector<int32_t> &bucket_boundaries,
-  const std::vector<int32_t> &bucket_batch_sizes, std::function<TensorRow(TensorRow)> element_length_function,
+BucketBatchByLengthDataset::BucketBatchByLengthDataset(
+  std::shared_ptr<Dataset> input, const std::vector<std::string> &column_names,
+  const std::vector<int32_t> &bucket_boundaries, const std::vector<int32_t> &bucket_batch_sizes,
+  std::function<TensorRow(TensorRow)> element_length_function,
   const std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> &pad_info, bool pad_to_bucket_boundary,
   bool drop_remainder) {
-  auto ds = std::make_shared<BucketBatchByLengthNode>(shared_from_this(), column_names, bucket_boundaries,
+  auto ds = std::make_shared<BucketBatchByLengthNode>(input->IRNode(), column_names, bucket_boundaries,
                                                       bucket_batch_sizes, element_length_function, pad_info,
                                                       pad_to_bucket_boundary, drop_remainder);
 
-  return ds;
-}
-
-// Function to create a SentencePieceVocab from dataset
-std::shared_ptr<SentencePieceVocab> Dataset::BuildSentencePieceVocab(
-  const std::vector<std::string> &col_names, uint32_t vocab_size, float character_coverage,
-  SentencePieceModel model_type, const std::unordered_map<std::string, std::string> &params) {
-  auto vocab = std::make_shared<SentencePieceVocab>();
-  auto ds = std::make_shared<BuildSentenceVocabNode>(shared_from_this(), vocab, col_names, vocab_size,
-                                                     character_coverage, model_type, params);
-
-  // Run tree here to start building vocab
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  if (iter == nullptr) {
-    MS_LOG(ERROR) << "Fail to run iterator in BuildSentencePieceVocab.";
-    return nullptr;
-  }
-
-  // Finish building vocab by triggering GetNextRow
-  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
-  if (!iter->GetNextRow(&row)) {
-    return nullptr;
-  }
-
-  return vocab;
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create a Vocab from dataset
-std::shared_ptr<Vocab> Dataset::BuildVocab(const std::vector<std::string> &columns,
-                                           const std::pair<int64_t, int64_t> &freq_range, int64_t top_k,
-                                           const std::vector<std::string> &special_tokens, bool special_first) {
-  auto vocab = std::make_shared<Vocab>();
-  auto ds = std::make_shared<BuildVocabNode>(shared_from_this(), vocab, columns, freq_range, top_k, special_tokens,
-                                             special_first);
-
-  // Run tree here to starting building vocab
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  if (iter == nullptr) {
-    MS_LOG(ERROR) << "Fail to run iterator in BuildVocab.";
-    return nullptr;
-  }
-
-  // Finish building vocab by triggering GetNextRow
-  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
-  if (!iter->GetNextRow(&row)) {
-    return nullptr;
-  }
-
-  return vocab;
-}
 #endif
 
-// Function to create a Concat dataset
-std::shared_ptr<ConcatNode> Dataset::Concat(const std::vector<std::shared_ptr<Dataset>> &datasets) {
-  auto ds = std::make_shared<ConcatNode>(datasets);
-  ds->children.push_back(shared_from_this());
+ConcatDataset::ConcatDataset(const std::vector<std::shared_ptr<Dataset>> &datasets) {
+  std::vector<std::shared_ptr<DatasetNode>> all_datasets;
+  (void)std::transform(
+    datasets.begin(), datasets.end(), std::back_inserter(all_datasets),
+    [](std::shared_ptr<Dataset> dataset) -> std::shared_ptr<DatasetNode> { return dataset->IRNode(); });
 
-  return ds;
+  auto ds = std::make_shared<ConcatNode>(all_datasets);
+
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create a Map dataset.
-std::shared_ptr<MapNode> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations,
-                                      std::vector<std::string> input_columns, std::vector<std::string> output_columns,
-                                      const std::vector<std::string> &project_columns,
-                                      const std::shared_ptr<DatasetCache> &cache) {
+MapDataset::MapDataset(std::shared_ptr<Dataset> input, std::vector<std::shared_ptr<TensorOperation>> operations,
+                       std::vector<std::string> input_columns, std::vector<std::string> output_columns,
+                       const std::vector<std::string> &project_columns, const std::shared_ptr<DatasetCache> &cache) {
   auto ds =
-    std::make_shared<MapNode>(shared_from_this(), operations, input_columns, output_columns, project_columns, cache);
+    std::make_shared<MapNode>(input->IRNode(), operations, input_columns, output_columns, project_columns, cache);
 
-  return ds;
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create a ProjectNode.
-std::shared_ptr<ProjectNode> Dataset::Project(const std::vector<std::string> &columns) {
-  auto ds = std::make_shared<ProjectNode>(shared_from_this(), columns);
+ProjectDataset::ProjectDataset(std::shared_ptr<Dataset> input, const std::vector<std::string> &columns) {
+  auto ds = std::make_shared<ProjectNode>(input->IRNode(), columns);
 
-  return ds;
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create a RenameNode.
-std::shared_ptr<RenameNode> Dataset::Rename(const std::vector<std::string> &input_columns,
-                                            const std::vector<std::string> &output_columns) {
-  auto ds = std::make_shared<RenameNode>(shared_from_this(), input_columns, output_columns);
+RenameDataset::RenameDataset(std::shared_ptr<Dataset> input, const std::vector<std::string> &input_columns,
+                             const std::vector<std::string> &output_columns) {
+  auto ds = std::make_shared<RenameNode>(input->IRNode(), input_columns, output_columns);
 
-  return ds;
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create Repeat dataset.
-std::shared_ptr<Dataset> Dataset::Repeat(int32_t count) {
+RepeatDataset::RepeatDataset(std::shared_ptr<Dataset> input, int32_t count) {
   // Workaround for repeat == 1, do not inject repeat.
   if (count == 1) {
-    return shared_from_this();
+    ir_node_ = input->IRNode();
+    return;
   }
 
-  auto ds = std::make_shared<RepeatNode>(shared_from_this(), count);
+  auto ds = std::make_shared<RepeatNode>(input->IRNode(), count);
 
-  return ds;
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create a ShuffleOp
-std::shared_ptr<ShuffleNode> Dataset::Shuffle(int32_t buffer_size) {
+ShuffleDataset::ShuffleDataset(std::shared_ptr<Dataset> input, int32_t buffer_size) {
   // Pass in reshuffle_each_epoch with true
-  auto ds = std::make_shared<ShuffleNode>(shared_from_this(), buffer_size, true);
+  auto ds = std::make_shared<ShuffleNode>(input->IRNode(), buffer_size, true);
 
-  return ds;
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create a SkipNode.
-std::shared_ptr<SkipNode> Dataset::Skip(int32_t count) {
-  auto ds = std::make_shared<SkipNode>(shared_from_this(), count);
+SkipDataset::SkipDataset(std::shared_ptr<Dataset> input, int32_t count) {
+  auto ds = std::make_shared<SkipNode>(input->IRNode(), count);
 
-  return ds;
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create a TakeNode.
-std::shared_ptr<Dataset> Dataset::Take(int32_t count) {
+TakeDataset::TakeDataset(std::shared_ptr<Dataset> input, int32_t count) {
   // If count is greater than the number of element in dataset or equal to -1,
   // all the element in dataset will be taken
   if (count == -1) {
-    return shared_from_this();
+    ir_node_ = input->IRNode();
+    return;
   }
 
-  auto ds = std::make_shared<TakeNode>(shared_from_this(), count);
+  auto ds = std::make_shared<TakeNode>(input->IRNode(), count);
 
-  return ds;
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-// Function to create a Zip dataset
-std::shared_ptr<ZipNode> Dataset::Zip(const std::vector<std::shared_ptr<Dataset>> &datasets) {
-  // Default values
-  auto ds = std::make_shared<ZipNode>(datasets);
-  ds->children.push_back(shared_from_this());
+ZipDataset::ZipDataset(const std::vector<std::shared_ptr<Dataset>> &datasets) {
+  std::vector<std::shared_ptr<DatasetNode>> all_datasets;
+  (void)std::transform(
+    datasets.begin(), datasets.end(), std::back_inserter(all_datasets),
+    [](std::shared_ptr<Dataset> dataset) -> std::shared_ptr<DatasetNode> { return dataset->IRNode(); });
 
-  return ds;
-}
+  auto ds = std::make_shared<ZipNode>(all_datasets);
 
-Status Dataset::AddCacheOp(std::vector<std::shared_ptr<DatasetOp>> *node_ops) {
-  if (cache_ != nullptr) {
-    RETURN_IF_NOT_OK(cache_->Build());
-    std::shared_ptr<DatasetOp> cache_op;
-    RETURN_IF_NOT_OK(cache_->CreateCacheOp(num_workers_, &cache_op));
-    node_ops->push_back(cache_op);
-  }
-  return Status::OK();
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
 int64_t Dataset::GetBatchSize() {
@@ -685,7 +595,7 @@ int64_t Dataset::GetBatchSize() {
     return -1;
   }
   if (!tree_getters_->isInitialized()) {
-    rc = tree_getters_->Init(ds);
+    rc = tree_getters_->Init(ds->IRNode());
     if (rc.IsError()) {
       MS_LOG(ERROR) << "GetBatchSize: Initializing TreeGetters failed.";
       return -1;
@@ -706,7 +616,7 @@ int64_t Dataset::GetRepeatCount() {
     return -1;
   }
   if (!tree_getters_->isInitialized()) {
-    rc = tree_getters_->Init(ds);
+    rc = tree_getters_->Init(ds->IRNode());
     if (rc.IsError()) {
       MS_LOG(ERROR) << "GetRepeatCount: Initializing TreeGetters failed.";
       return -1;
@@ -715,7 +625,77 @@ int64_t Dataset::GetRepeatCount() {
   rc = tree_getters_->GetRepeatCount(&repeat_count);
   return rc.IsError() ? 0 : repeat_count;
 }
+std::shared_ptr<Dataset> Dataset::SetNumWorkers(int32_t num_workers) {
+  if (ir_node_ == nullptr || ir_node_->SetNumWorkers(num_workers) == nullptr) {
+    return nullptr;
+  }
+  return shared_from_this();
+}
+#ifndef ENABLE_ANDROID
+std::shared_ptr<SentencePieceVocab> Dataset::BuildSentencePieceVocab(
+  const std::vector<std::string> &col_names, uint32_t vocab_size, float character_coverage,
+  SentencePieceModel model_type, const std::unordered_map<std::string, std::string> &params) {
+  auto vocab = std::make_shared<SentencePieceVocab>();
+  auto ds = std::make_shared<BuildSentenceVocabNode>(IRNode(), vocab, col_names, vocab_size, character_coverage,
+                                                     model_type, params);
 
+  std::unique_ptr<RuntimeContext> runtime_context = std::make_unique<RuntimeContext>();
+  Status rc = runtime_context->Init();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Failed to init runtime context. Error status: " << rc;
+    return nullptr;
+  }
+
+  auto consumer = std::make_unique<BuildVocabConsumer>();
+  BuildVocabConsumer *bv_consumer = consumer.get();
+  rc = consumer->Init(ds);
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "BuildVocab: Failed to init. Error status: " << rc;
+    return nullptr;
+  }
+  runtime_context->AssignConsumer(std::move(consumer));
+
+  // Run tree here to starting building vocab
+  rc = bv_consumer->Start();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "BuildVocab: Failed to start. Error status: " << rc;
+    return nullptr;
+  }
+  return vocab;
+}
+
+std::shared_ptr<Vocab> Dataset::BuildVocab(const std::vector<std::string> &columns,
+                                           const std::pair<int64_t, int64_t> &freq_range, int64_t top_k,
+                                           const std::vector<std::string> &special_tokens, bool special_first) {
+  auto vocab = std::make_shared<Vocab>();
+  auto ds =
+    std::make_shared<BuildVocabNode>(IRNode(), vocab, columns, freq_range, top_k, special_tokens, special_first);
+
+  std::unique_ptr<RuntimeContext> runtime_context = std::make_unique<RuntimeContext>();
+  Status rc = runtime_context->Init();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Failed to init runtime context. Error status: " << rc;
+    return nullptr;
+  }
+
+  auto consumer = std::make_unique<BuildVocabConsumer>();
+  BuildVocabConsumer *bv_consumer = consumer.get();
+  rc = consumer->Init(ds);
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "BuildVocab: Failed to init. Error status: " << rc;
+    return nullptr;
+  }
+  runtime_context->AssignConsumer(std::move(consumer));
+
+  // Run tree here to starting building vocab
+  rc = bv_consumer->Start();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "BuildVocab: Failed to start. Error status: " << rc;
+    return nullptr;
+  }
+  return vocab;
+}
+#endif
 SchemaObj::SchemaObj(const std::string &schema_file) : schema_file_(schema_file), num_rows_(0), dataset_type_("") {}
 
 // SchemaObj init function
@@ -1046,6 +1026,136 @@ std::shared_ptr<DatasetCache> CreateDatasetCache(session_id_type id, uint64_t me
 }
 #endif
 
+AlbumDataset::AlbumDataset(const std::string &dataset_dir, const std::string &data_schema,
+                           const std::vector<std::string> &column_names, bool decode,
+                           const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<AlbumNode>(dataset_dir, data_schema, column_names, decode, sampler, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &usage,
+                             const std::shared_ptr<SamplerObj> &sampler, bool decode,
+                             const std::set<std::string> &extensions, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<CelebANode>(dataset_dir, usage, sampler, decode, extensions, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, const std::string &usage,
+                               const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<Cifar10Node>(dataset_dir, usage, sampler, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, const std::string &usage,
+                                 const std::shared_ptr<SamplerObj> &sampler,
+                                 const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<Cifar100Node>(dataset_dir, usage, sampler, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+CLUEDataset::CLUEDataset(const std::vector<std::string> &dataset_files, const std::string &task,
+                         const std::string &usage, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards,
+                         int32_t shard_id, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<CLUENode>(dataset_files, task, usage, num_samples, shuffle, num_shards, shard_id, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+CocoDataset::CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
+                         const bool &decode, const std::shared_ptr<SamplerObj> &sampler,
+                         const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<CocoNode>(dataset_dir, annotation_file, task, decode, sampler, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+CSVDataset::CSVDataset(const std::vector<std::string> &dataset_files, char field_delim,
+                       const std::vector<std::shared_ptr<CsvBase>> &column_defaults,
+                       const std::vector<std::string> &column_names, int64_t num_samples, ShuffleMode shuffle,
+                       int32_t num_shards, int32_t shard_id, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<CSVNode>(dataset_files, field_delim, column_defaults, column_names, num_samples, shuffle,
+                                      num_shards, shard_id, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+ImageFolderDataset::ImageFolderDataset(const std::string &dataset_dir, bool decode,
+                                       const std::shared_ptr<SamplerObj> &sampler,
+                                       const std::set<std::string> &extensions,
+                                       const std::map<std::string, int32_t> &class_indexing,
+                                       const std::shared_ptr<DatasetCache> &cache) {
+  // This arg exists in ImageFolderOp, but not externalized (in Python API). The default value is false.
+  bool recursive = false;
+
+  // Create logical representation of ImageFolderDataset.
+  auto ds =
+    std::make_shared<ImageFolderNode>(dataset_dir, decode, sampler, recursive, extensions, class_indexing, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+
+#ifndef ENABLE_ANDROID
+ManifestDataset::ManifestDataset(const std::string &dataset_file, const std::string &usage,
+                                 const std::shared_ptr<SamplerObj> &sampler,
+                                 const std::map<std::string, int32_t> &class_indexing, bool decode,
+                                 const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<ManifestNode>(dataset_file, usage, sampler, class_indexing, decode, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+MindDataDataset::MindDataDataset(const std::string &dataset_file, const std::vector<std::string> &columns_list,
+                                 const std::shared_ptr<SamplerObj> &sampler, nlohmann::json padded_sample,
+                                 int64_t num_padded) {
+  auto ds = std::make_shared<MindDataNode>(dataset_file, columns_list, sampler, padded_sample, num_padded);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+MindDataDataset::MindDataDataset(const std::vector<std::string> &dataset_files,
+                                 const std::vector<std::string> &columns_list,
+                                 const std::shared_ptr<SamplerObj> &sampler, nlohmann::json padded_sample,
+                                 int64_t num_padded) {
+  auto ds = std::make_shared<MindDataNode>(dataset_files, columns_list, sampler, padded_sample, num_padded);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+#endif
+MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage,
+                           const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<MnistNode>(dataset_dir, usage, sampler, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+TextFileDataset::TextFileDataset(const std::vector<std::string> &dataset_files, int64_t num_samples,
+                                 ShuffleMode shuffle, int32_t num_shards, int32_t shard_id,
+                                 const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<TextFileNode>(dataset_files, num_samples, shuffle, num_shards, shard_id, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+#ifndef ENABLE_ANDROID
+VOCDataset::VOCDataset(const std::string &dataset_dir, const std::string &task, const std::string &usage,
+                       const std::map<std::string, int32_t> &class_indexing, bool decode,
+                       const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) {
+  auto ds = std::make_shared<VOCNode>(dataset_dir, task, usage, class_indexing, decode, sampler, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+#endif
+RandomDataDataset::RandomDataDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema,
+                                     const std::vector<std::string> &columns_list,
+                                     const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache) {
+  auto ds =
+    std::make_shared<RandomNode>(total_rows, std::move(schema), std::move(columns_list), std::move(sampler), cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+RandomDataDataset::RandomDataDataset(const int32_t &total_rows, std::string schema_path,
+                                     const std::vector<std::string> &columns_list,
+                                     const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache) {
+  auto ds = std::make_shared<RandomNode>(total_rows, std::move(schema_path), std::move(columns_list),
+                                         std::move(sampler), cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+#ifndef ENABLE_ANDROID
+TFRecordDataset::TFRecordDataset(const std::vector<std::string> &dataset_files, std::string schema,
+                                 const std::vector<std::string> &columns_list, int64_t num_samples, ShuffleMode shuffle,
+                                 int32_t num_shards, int32_t shard_id, bool shard_equal_rows,
+                                 std::shared_ptr<DatasetCache> cache) {
+  auto ds = std::make_shared<TFRecordNode>(dataset_files, schema, columns_list, num_samples, shuffle, num_shards,
+                                           shard_id, shard_equal_rows, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+TFRecordDataset::TFRecordDataset(const std::vector<std::string> &dataset_files, std::shared_ptr<SchemaObj> schema,
+                                 const std::vector<std::string> &columns_list, int64_t num_samples, ShuffleMode shuffle,
+                                 int32_t num_shards, int32_t shard_id, bool shard_equal_rows,
+                                 std::shared_ptr<DatasetCache> cache) {
+  auto ds = std::make_shared<TFRecordNode>(dataset_files, schema, columns_list, num_samples, shuffle, num_shards,
+                                           shard_id, shard_equal_rows, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+#endif
 std::shared_ptr<SamplerObj> SelectSampler(int64_t num_samples, bool shuffle, int32_t num_shards, int32_t shard_id) {
   if (shuffle) {
     if (num_shards > 1) {
@@ -1062,7 +1172,6 @@ std::shared_ptr<SamplerObj> SelectSampler(int64_t num_samples, bool shuffle, int
   // If shuffle disabled, sharding disabled, use sequential sampler
   return SequentialSampler(0, num_samples);
 }
-
 }  // namespace api
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/api/iterator.cc b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
index 39082e626d..1e8e74ca1a 100644
--- a/mindspore/ccsrc/minddata/dataset/api/iterator.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
@@ -53,7 +53,7 @@ Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds) {
   RETURN_IF_NOT_OK(runtime_context->Init());
   auto consumer = std::make_unique<IteratorConsumer>();
   consumer_ = consumer.get();
-  RETURN_IF_NOT_OK(consumer->Init(ds));
+  RETURN_IF_NOT_OK(consumer->Init(ds->IRNode()));
   runtime_context->AssignConsumer(std::move(consumer));
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
index 4aa54a53cd..c97bc098bb 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/CMakeLists.txt
@@ -11,7 +11,7 @@ endif ()
 
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine OBJECT
+set(SRC_FILES_LIST
         execution_tree.cc
         data_buffer.cc
         data_schema.cc
@@ -20,10 +20,19 @@ add_library(engine OBJECT
         runtime_context.cc
         consumers/tree_consumer.cc
         )
+if (ENABLE_PYTHON)
+  set(SRC_FILES_LIST
+          ${SRC_FILES_LIST}
+          python_runtime_context.cc
+          consumers/python_tree_consumer.cc
+          )
+endif ()
+
+add_library(engine OBJECT ${SRC_FILES_LIST})
 
 if (ENABLE_PYTHON)
-    target_include_directories(engine PRIVATE ${pybind11_INCLUDE_DIRS})
-endif()
+  target_include_directories(engine PRIVATE ${pybind11_INCLUDE_DIRS})
+endif ()
 
 add_dependencies(engine engine-datasetops engine-datasetops-source engine-opt engine-gnn engine-perf engine-cache-client engine-datasetops-mapop)
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc
new file mode 100644
index 0000000000..bca21d61cb
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "minddata/dataset/engine/consumers/python_tree_consumer.h"
+
+namespace mindspore::dataset {
+
+Status PythonIteratorConsumer::GetNextAsList(py::list *out) {
+  std::vector<TensorPtr> row;
+  {
+    py::gil_scoped_release gil_release;
+    RETURN_IF_NOT_OK(GetNextAsVector(&row));
+  }
+  for (auto el : row) {
+    (*out).append(el);
+  }
+  return Status::OK();
+}
+Status PythonIteratorConsumer::GetNextAsDict(py::dict *out) {
+  std::unordered_map<std::string, TensorPtr> row;
+  {
+    py::gil_scoped_release gil_release;
+    RETURN_IF_NOT_OK(GetNextAsMap(&row));
+  }
+  for (auto el : row) {
+    (*out)[common::SafeCStr(el.first)] = el.second;
+  }
+  return Status::OK();
+}
+}  // namespace mindspore::dataset
diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.h b/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.h
index 0359a43c23..19e98f570e 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.h
@@ -26,24 +26,21 @@
 namespace mindspore::dataset {
 
 /// Consumer that iterates over the dataset and returns the rows one by one as a python list or a dict
-class PythonIterator : public IteratorConsumer {
-  /// Constructor
+
+class PythonIteratorConsumer : public IteratorConsumer {
+ public:
+  /// Constructor which will call the base class default constructor.
   /// \param num_epochs number of epochs. Default to -1 (infinite epochs).
-  explicit PythonIterator(int32_t num_epochs = -1) : IteratorConsumer(num_epochs) {}
+  explicit PythonIteratorConsumer(int32_t num_epochs = -1) : IteratorConsumer(num_epochs) {}
+  /// Returns the next row in a vector format
+  /// \param[out] out std::vector of Tensors
+  /// \return Status error code
+  Status GetNextAsList(py::list *out);
 
-  /// Get the next row as a python dict
-  /// \param[out] output python dict
-  /// \return  Status error code
-  Status GetNextAsMap(py::dict *output) {
-    return Status(StatusCode::kNotImplementedYet, __LINE__, __FILE__, "Method is not implemented yet.");
-  }
-  /// Get the next row as a python dict
-  /// \param[out] output python dict
-  /// \return  Status error code
-  Status GetNextAsList(py::list *output) {
-    return Status(StatusCode::kNotImplementedYet, __LINE__, __FILE__, "Method is not implemented yet.");
-  }
+  /// Returns the next row in as a map
+  /// \param[out] out std::map of string to Tensor
+  /// \return Status error code
+  Status GetNextAsDict(py::dict *out);
 };
-
 }  // namespace mindspore::dataset
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_CONSUMERS_PYTHON_TREE_CONSUMER_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
index c65a749103..5b62deefd2 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
@@ -34,10 +34,11 @@ namespace mindspore::dataset {
 // TreeConsumer
 TreeConsumer::TreeConsumer() { tree_adapter_ = std::make_unique<TreeAdapter>(); }
 
-Status TreeConsumer::Init(std::shared_ptr<api::Dataset> d) { return tree_adapter_->BuildAndPrepare(std::move(d)); }
+Status TreeConsumer::Init(std::shared_ptr<api::DatasetNode> d) { return tree_adapter_->BuildAndPrepare(std::move(d)); }
+Status TreeConsumer::Terminate() { return tree_adapter_->AllTasks()->DoServiceStop(); }
 
 // IteratorConsumer
-Status IteratorConsumer::Init(std::shared_ptr<api::Dataset> d) {
+Status IteratorConsumer::Init(std::shared_ptr<api::DatasetNode> d) {
   return tree_adapter_->BuildAndPrepare(std::move(d), num_epochs_);
 }
 
@@ -73,7 +74,7 @@ Status IteratorConsumer::GetNextAsMap(std::unordered_map<std::string, TensorPtr>
 }
 
 // ToDevice
-Status ToDevice::Init(std::shared_ptr<api::Dataset> d) {
+Status ToDevice::Init(std::shared_ptr<api::DatasetNode> d) {
   return tree_adapter_->BuildAndPrepare(std::move(d), num_epochs_);
 }
 
@@ -384,7 +385,7 @@ TreeGetters::TreeGetters() : dataset_size_(-1), init_flag_(false), row_flag_(fal
   tree_adapter_ = std::make_unique<TreeAdapter>();
 }
 
-Status TreeGetters::Init(std::shared_ptr<api::Dataset> d) {
+Status TreeGetters::Init(std::shared_ptr<api::DatasetNode> d) {
   Status s = tree_adapter_->BuildAndPrepare(std::move(d));
   if (!s.IsError()) {
     init_flag_ = true;
@@ -463,4 +464,15 @@ Status TreeGetters::GetNumClasses(int64_t *num_classes) {
   RETURN_IF_NOT_OK(root->GetNumClasses(num_classes));
   return Status::OK();
 }
+Status BuildVocabConsumer::Init(std::shared_ptr<api::DatasetNode> d) {
+  return tree_adapter_->BuildAndPrepare(std::move(d), 1);
+}
+Status BuildVocabConsumer::Start() {
+  // Getting one row would trigger building the vocab
+  TensorRow row;
+  RETURN_IF_NOT_OK(tree_adapter_->GetNext(&row));
+  // The returned row would EOE which is an empty row
+  CHECK_FAIL_RETURN_UNEXPECTED(row.empty(), "The fetched row from BuildVocab should be an EOE.");
+  return Status::OK();
+}
 }  // namespace mindspore::dataset
diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h
index 1dcd314f9f..7e947f3909 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h
@@ -22,14 +22,16 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
+
 #include "minddata/dataset/engine/tree_adapter.h"
+#include "minddata/dataset/text/vocab.h"
 
 namespace mindspore::dataset {
 // Forward declare
 class TreeAdapter;
 
 namespace api {
-class Dataset;
+class DatasetNode;
 }
 
 /// A base class for tree consumers which would fetch rows from the tree pipeline
@@ -40,7 +42,9 @@ class TreeConsumer {
   /// Initializes the consumer, this involves constructing and preparing the tree.
   /// \param d The dataset node that represent the root of the IR tree.
   /// \return Status error code.
-  virtual Status Init(std::shared_ptr<api::Dataset> d);
+  virtual Status Init(std::shared_ptr<api::DatasetNode> d);
+
+  Status Terminate();
 
  protected:
   /// The class owns the tree_adapter that handles execution tree operations.
@@ -57,7 +61,7 @@ class IteratorConsumer : public TreeConsumer {
   /// \param num_epochs number of epochs. Default to -1 (infinite epochs).
   explicit IteratorConsumer(int32_t num_epochs = -1) : TreeConsumer(), num_epochs_(num_epochs) {}
 
-  Status Init(std::shared_ptr<api::Dataset> d) override;
+  Status Init(std::shared_ptr<api::DatasetNode> d) override;
 
   /// Returns the next row in a vector format
   /// \param[out] out std::vector of Tensors
@@ -126,10 +130,10 @@ class SaveToDisk : public TreeConsumer {
 /// Consumer that iterates over the dataset and send it to a device
 class ToDevice : public TreeConsumer {
  public:
-  ToDevice(std::string device_type, bool send_epoch_end, int32_t num_epochs = -1)
-      : TreeConsumer(), device_type_(device_type), send_epoch_end_(send_epoch_end), num_epochs_(num_epochs) {}
+  explicit ToDevice(bool send_epoch_end, int32_t num_epochs = -1)
+      : TreeConsumer(), send_epoch_end_(send_epoch_end), num_epochs_(num_epochs) {}
 
-  Status Init(std::shared_ptr<api::Dataset> d) override;
+  Status Init(std::shared_ptr<api::DatasetNode> d) override;
 
   /// Send the data to device
   /// \return  Status error code
@@ -158,7 +162,7 @@ class ToDevice : public TreeConsumer {
 class TreeGetters : public TreeConsumer {
  public:
   TreeGetters();
-  Status Init(std::shared_ptr<api::Dataset> d) override;
+  Status Init(std::shared_ptr<api::DatasetNode> d) override;
   Status GetDatasetSize(int64_t *size);
   Status GetOutputTypes(std::vector<DataType> *types);
   Status GetOutputShapes(std::vector<TensorShape> *shapes);
@@ -176,5 +180,23 @@ class TreeGetters : public TreeConsumer {
   bool row_flag_;   // indicate whether the first row has been stored in row_
 };
 
+class BuildVocabConsumer : public TreeConsumer {
+ public:
+  /// BuildVocabConsumer Constructor which will call the base class default constructor.
+  BuildVocabConsumer() = default;
+
+  Status Init(std::shared_ptr<api::DatasetNode> d) override;
+
+  /// Save the given dataset to MindRecord format on disk. This is a blocking method (i.e., after returning, all rows
+  /// would be written to disk)
+  /// \return  Status error code
+  Status Start();
+
+ protected:
+  /// Method to return the name of the consumer
+  /// \return string
+  std::string Name() override { return "BuildVocab"; }
+};
+
 }  // namespace mindspore::dataset
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_CONSUMERS_TREE_CONSUMER_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/CMakeLists.txt
index 2c6f8e133d..a2a2d18595 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/CMakeLists.txt
@@ -3,6 +3,7 @@ set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE
 add_subdirectory(source)
 
 set(DATASET_ENGINE_IR_DATASETOPS_SRC_FILES
+        dataset_node.cc
         batch_node.cc
         bucket_batch_by_length_node.cc
         build_sentence_piece_vocab_node.cc
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc
index edcf95cde7..7b8fc14d5f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc
@@ -28,7 +28,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 
-BatchNode::BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad,
+BatchNode::BatchNode(std::shared_ptr<DatasetNode> child, int32_t batch_size, bool drop_remainder, bool pad,
                      std::vector<std::string> cols_to_map,
                      std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map)
     : batch_size_(batch_size),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.h
index a2462f49d4..708ad6d969 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.h
@@ -23,16 +23,16 @@
 #include <utility>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class BatchNode : public Dataset {
+class BatchNode : public DatasetNode {
  public:
   /// \brief Constructor
-  BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad,
+  BatchNode(std::shared_ptr<DatasetNode> child, int32_t batch_size, bool drop_remainder, bool pad,
             std::vector<std::string> cols_to_map,
             std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map);
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.cc
index 49bae0ac60..81be43eac1 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.cc
@@ -29,7 +29,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 BucketBatchByLengthNode::BucketBatchByLengthNode(
-  std::shared_ptr<Dataset> child, const std::vector<std::string> &column_names,
+  std::shared_ptr<DatasetNode> child, const std::vector<std::string> &column_names,
   const std::vector<int32_t> &bucket_boundaries, const std::vector<int32_t> &bucket_batch_sizes,
   std::function<TensorRow(TensorRow)> element_length_function,
   const std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> &pad_info, bool pad_to_bucket_boundary,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.h
index 64f861721e..e9f395c363 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.h
@@ -23,15 +23,15 @@
 #include <utility>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
-class BucketBatchByLengthNode : public Dataset {
+class BucketBatchByLengthNode : public DatasetNode {
  public:
   /// \brief Constructor
-  BucketBatchByLengthNode(std::shared_ptr<Dataset> child, const std::vector<std::string> &column_names,
+  BucketBatchByLengthNode(std::shared_ptr<DatasetNode> child, const std::vector<std::string> &column_names,
                           const std::vector<int32_t> &bucket_boundaries, const std::vector<int32_t> &bucket_batch_sizes,
                           std::function<TensorRow(TensorRow)> element_length_function = nullptr,
                           const std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> &pad_info = {},
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.cc
index 5704617f9a..1dda6410cd 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.cc
@@ -28,7 +28,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 
-BuildSentenceVocabNode::BuildSentenceVocabNode(std::shared_ptr<Dataset> child,
+BuildSentenceVocabNode::BuildSentenceVocabNode(std::shared_ptr<DatasetNode> child,
                                                std::shared_ptr<SentencePieceVocab> vocab,
                                                const std::vector<std::string> &col_names, uint32_t vocab_size,
                                                float character_coverage, SentencePieceModel model_type,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.h
index 868daaae5f..10eb7e99d0 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.h
@@ -29,10 +29,10 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 
-class BuildSentenceVocabNode : public Dataset {
+class BuildSentenceVocabNode : public DatasetNode {
  public:
   /// \brief Constructor
-  BuildSentenceVocabNode(std::shared_ptr<Dataset> child, std::shared_ptr<SentencePieceVocab> vocab,
+  BuildSentenceVocabNode(std::shared_ptr<DatasetNode> child, std::shared_ptr<SentencePieceVocab> vocab,
                          const std::vector<std::string> &col_names, uint32_t vocab_size, float character_coverage,
                          SentencePieceModel model_type, const std::unordered_map<std::string, std::string> &params);
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_vocab_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_vocab_node.cc
index 6fdb5d7c3a..bad52db138 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_vocab_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_vocab_node.cc
@@ -28,7 +28,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 
-BuildVocabNode::BuildVocabNode(std::shared_ptr<Dataset> child, std::shared_ptr<Vocab> vocab,
+BuildVocabNode::BuildVocabNode(std::shared_ptr<DatasetNode> child, std::shared_ptr<Vocab> vocab,
                                const std::vector<std::string> &columns, const std::pair<int64_t, int64_t> &freq_range,
                                int64_t top_k, const std::vector<std::string> &special_tokens, bool special_first)
     : vocab_(vocab),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_vocab_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_vocab_node.h
index a7a20c3897..9f0de2d133 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_vocab_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_vocab_node.h
@@ -22,17 +22,17 @@
 #include <utility>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class BuildVocabNode : public Dataset {
+class BuildVocabNode : public DatasetNode {
  public:
   /// \brief Constructor
-  BuildVocabNode(std::shared_ptr<Dataset> child, std::shared_ptr<Vocab> vocab, const std::vector<std::string> &columns,
-                 const std::pair<int64_t, int64_t> &freq_range, int64_t top_k,
+  BuildVocabNode(std::shared_ptr<DatasetNode> child, std::shared_ptr<Vocab> vocab,
+                 const std::vector<std::string> &columns, const std::pair<int64_t, int64_t> &freq_range, int64_t top_k,
                  const std::vector<std::string> &special_tokens, bool special_first);
 
   /// \brief Destructor
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.cc
index a1a506261d..36ce94a732 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.cc
@@ -27,18 +27,16 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 // Function to build ConcatOp
-ConcatNode::ConcatNode(const std::vector<std::shared_ptr<Dataset>> &datasets) : datasets_(datasets) {
-  this->children = datasets_;
-}
+ConcatNode::ConcatNode(const std::vector<std::shared_ptr<DatasetNode>> &datasets) { this->children = datasets; }
 
 Status ConcatNode::ValidateParams() {
-  if (datasets_.empty()) {
+  if (children.size() < 2) {
     std::string err_msg = "ConcatNode: concatenated datasets are not specified.";
     MS_LOG(ERROR) << err_msg;
     RETURN_STATUS_SYNTAX_ERROR(err_msg);
   }
 
-  if (find(datasets_.begin(), datasets_.end(), nullptr) != datasets_.end()) {
+  if (find(children.begin(), children.end(), nullptr) != children.end()) {
     std::string err_msg = "ConcatNode: concatenated datasets should not be null.";
     MS_LOG(ERROR) << err_msg;
     RETURN_STATUS_SYNTAX_ERROR(err_msg);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.h
index f0b9fcae94..61822b1283 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.h
@@ -21,16 +21,16 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class ConcatNode : public Dataset {
+class ConcatNode : public DatasetNode {
  public:
   /// \brief Constructor
-  explicit ConcatNode(const std::vector<std::shared_ptr<Dataset>> &datasets);
+  explicit ConcatNode(const std::vector<std::shared_ptr<DatasetNode>> &datasets);
 
   /// \brief Destructor
   ~ConcatNode() = default;
@@ -42,9 +42,6 @@ class ConcatNode : public Dataset {
   /// \brief Parameters validation
   /// \return Status Status::OK() if all the parameters are valid
   Status ValidateParams() override;
-
- private:
-  std::vector<std::shared_ptr<Dataset>> datasets_;
 };
 
 }  // namespace api
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc
new file mode 100644
index 0000000000..895fc5fd03
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
+
+#include <memory>
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+Status DatasetNode::AddCacheOp(std::vector<std::shared_ptr<DatasetOp>> *node_ops) {
+  if (cache_ != nullptr) {
+    RETURN_IF_NOT_OK(cache_->Build());
+    std::shared_ptr<DatasetOp> cache_op;
+    RETURN_IF_NOT_OK(cache_->CreateCacheOp(num_workers_, &cache_op));
+    node_ops->push_back(cache_op);
+  }
+  return Status::OK();
+}
+// Constructor to initialize the cache
+DatasetNode::DatasetNode(const std::shared_ptr<DatasetCache> &dataset_cache) : DatasetNode() { cache_ = dataset_cache; }
+
+std::shared_ptr<DatasetNode> DatasetNode::SetNumWorkers(int32_t num_workers) {
+#if !defined(_WIN32) && !defined(_WIN64)
+#ifndef ENABLE_ANDROID
+  int32_t cpu_count = sysconf(_SC_NPROCESSORS_CONF);
+  if (cpu_count < 0 || cpu_count > INT32_MAX) {
+    MS_LOG(ERROR) << "Error determining current CPU: " << cpu_count;
+    return nullptr;
+  }
+  if (num_workers < 1 || num_workers > cpu_count) {
+    MS_LOG(ERROR) << "num_workers exceeds the boundary between 1 and " << cpu_count;
+    return nullptr;
+  }
+#endif
+#endif
+  num_workers_ = num_workers;
+  return shared_from_this();
+}
+DatasetNode::DatasetNode() {
+  // Fetch some default value from config manager
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  num_workers_ = cfg->num_parallel_workers();
+  rows_per_buffer_ = cfg->rows_per_buffer();
+  connector_que_size_ = cfg->op_connector_size();
+  worker_connector_size_ = cfg->worker_connector_size();
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
new file mode 100644
index 0000000000..89766d31a7
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
@@ -0,0 +1,126 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_DATASET_NODE_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_DATASET_NODE_H_
+
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "minddata/dataset/include/datasets.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+class Dataset;
+class SamplerObj;
+
+#define RETURN_EMPTY_IF_ERROR(_s) \
+  do {                            \
+    Status __rc = (_s);           \
+    if (__rc.IsError()) {         \
+      MS_LOG(ERROR) << __rc;      \
+      return {};                  \
+    }                             \
+  } while (false)
+
+Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
+                    int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr<DatasetOp> *shuffle_op);
+
+// Helper function to validate dataset files parameter
+Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector<std::string> &dataset_files);
+
+// Helper function to validate dataset num_shards and shard_id parameters
+Status ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_shards, int32_t shard_id);
+
+// Helper function to validate dataset sampler parameter
+Status ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler);
+
+Status ValidateStringValue(const std::string &dataset_name, const std::string &str,
+                           const std::unordered_set<std::string> &valid_strings);
+
+// Helper function to validate dataset input/output column parameterCD -
+Status ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param,
+                                  const std::vector<std::string> &columns);
+
+// Helper function to validate dataset directory parameter
+Status ValidateDatasetDirParam(const std::string &dataset_name, std::string dataset_dir);
+
+/// \brief Function to create a sampler for non-mappable dataset (to be used by cache op later).
+/// \notes Non-mappable dataset does not directly support a sampler. It has provided sampling arguments (shuffle,
+///     num_samples, num_shards, shard_id) and it DOES support sampling if somewhere above it in the pipeline contains
+///     a cache. If there is no cache above it, then the sampler is not used.
+/// \param[in] num_samples The number of samples to be included in the dataset.
+/// \param[in] shuffle If true, the indices are shuffled.
+/// \param[in] num_shards Number of shards to divide the dataset into.
+/// \param[in] shard_id Shard ID of the current shard within num_shards.
+/// \return Shared pointer to the current Sampler.
+std::shared_ptr<SamplerObj> SelectSampler(int64_t num_samples, bool shuffle, int32_t num_shards, int32_t shard_id);
+
+class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
+ public:
+  /// \brief Constructor
+  DatasetNode();
+
+  /// \brief Constructor that initializes the cache
+  /// \param dataset_cache DatasetCache
+  explicit DatasetNode(const std::shared_ptr<DatasetCache> &dataset_cache);
+
+  /// \brief Destructor
+  ~DatasetNode() = default;
+
+  /// \brief Pure virtual function to convert a DatasetNode class into a runtime dataset object
+  /// \return The list of shared pointers to the newly created DatasetOps
+  virtual std::vector<std::shared_ptr<DatasetOp>> Build() = 0;
+
+  /// \brief Pure virtual function for derived class to implement parameters validation
+  /// \return Status Status::OK() if all the parameters are valid
+  virtual Status ValidateParams() = 0;
+
+  const std::vector<std::shared_ptr<DatasetNode>> Children() const { return children; }
+
+  /// \brief Pure virtual function for derived class to get the shard id of specific node
+  /// \return Status Status::OK() if get shard id successfully
+  virtual Status GetShardId(int32_t *shard_id) {
+    return Status(StatusCode::kNotImplementedYet, __LINE__, __FILE__, "Method is not implemented yet.");
+  }
+
+  /// \brief Setter function for runtime number of workers
+  /// \param[in] num_workers The number of threads in this operator
+  /// \return Shared pointer to the original object
+  std::shared_ptr<DatasetNode> SetNumWorkers(int32_t num_workers);
+
+ protected:
+  std::vector<std::shared_ptr<DatasetNode>> children;
+  std::shared_ptr<DatasetNode> parent;
+  std::shared_ptr<DatasetCache> cache_;
+  Status AddCacheOp(std::vector<std::shared_ptr<DatasetOp>> *node_ops);
+
+  int32_t num_workers_;
+  int32_t rows_per_buffer_;
+  int32_t connector_que_size_;
+  int32_t worker_connector_size_;
+};
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_DATASET_NODE_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc
index cd27a48936..c7dc98a9e5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc
@@ -28,14 +28,14 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 
-MapNode::MapNode(std::shared_ptr<Dataset> child, std::vector<std::shared_ptr<TensorOperation>> operations,
+MapNode::MapNode(std::shared_ptr<DatasetNode> child, std::vector<std::shared_ptr<TensorOperation>> operations,
                  std::vector<std::string> input_columns, std::vector<std::string> output_columns,
                  const std::vector<std::string> &project_columns, std::shared_ptr<DatasetCache> cache)
     : operations_(operations),
       input_columns_(input_columns),
       output_columns_(output_columns),
       project_columns_(project_columns),
-      Dataset(std::move(cache)) {
+      DatasetNode(std::move(cache)) {
   this->children.push_back(child);
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.h
index 0eec6981b7..9ee5d1b8b8 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.h
@@ -21,15 +21,15 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
-class MapNode : public Dataset {
+class MapNode : public DatasetNode {
  public:
   /// \brief Constructor
-  MapNode(std::shared_ptr<Dataset> child, std::vector<std::shared_ptr<TensorOperation>> operations,
+  MapNode(std::shared_ptr<DatasetNode> child, std::vector<std::shared_ptr<TensorOperation>> operations,
           std::vector<std::string> input_columns = {}, std::vector<std::string> output_columns = {},
           const std::vector<std::string> &columns = {}, std::shared_ptr<DatasetCache> cache = nullptr);
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc
index 10573671d7..9fa7234c58 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc
@@ -28,7 +28,8 @@ namespace dataset {
 namespace api {
 
 // Function to build ProjectOp
-ProjectNode::ProjectNode(std::shared_ptr<Dataset> child, const std::vector<std::string> &columns) : columns_(columns) {
+ProjectNode::ProjectNode(std::shared_ptr<DatasetNode> child, const std::vector<std::string> &columns)
+    : columns_(columns) {
   this->children.push_back(child);
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.h
index 5faf88a986..7a6fb52869 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.h
@@ -21,17 +21,17 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace api {
 
-class ProjectNode : public Dataset {
+class ProjectNode : public DatasetNode {
  public:
   /// \brief Constructor
-  explicit ProjectNode(std::shared_ptr<Dataset> child, const std::vector<std::string> &columns);
+  explicit ProjectNode(std::shared_ptr<DatasetNode> child, const std::vector<std::string> &columns);
 
   /// \brief Destructor
   ~ProjectNode() = default;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc
index 284959f3ad..4d29b8e030 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc
@@ -27,7 +27,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 // Function to build RenameOp
-RenameNode::RenameNode(std::shared_ptr<Dataset> child, const std::vector<std::string> &input_columns,
+RenameNode::RenameNode(std::shared_ptr<DatasetNode> child, const std::vector<std::string> &input_columns,
                        const std::vector<std::string> &output_columns)
     : input_columns_(input_columns), output_columns_(output_columns) {
   this->children.push_back(child);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.h
index 11d8975056..379c74beae 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.h
@@ -21,17 +21,17 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace api {
 
-class RenameNode : public Dataset {
+class RenameNode : public DatasetNode {
  public:
   /// \brief Constructor
-  explicit RenameNode(std::shared_ptr<Dataset> child, const std::vector<std::string> &input_columns,
+  explicit RenameNode(std::shared_ptr<DatasetNode> child, const std::vector<std::string> &input_columns,
                       const std::vector<std::string> &output_columns);
 
   /// \brief Destructor
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc
index 40043e6117..071d92c816 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc
@@ -27,7 +27,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 
-RepeatNode::RepeatNode(std::shared_ptr<Dataset> child, int32_t count) : repeat_count_(count) {
+RepeatNode::RepeatNode(std::shared_ptr<DatasetNode> child, int32_t count) : repeat_count_(count) {
   this->children.push_back(child);
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.h
index d893da637a..3385b33db0 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.h
@@ -23,17 +23,17 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace api {
 
-class RepeatNode : public Dataset {
+class RepeatNode : public DatasetNode {
  public:
   /// \brief Constructor
-  explicit RepeatNode(std::shared_ptr<Dataset> child, int32_t count);
+  explicit RepeatNode(std::shared_ptr<DatasetNode> child, int32_t count);
 
   /// \brief Destructor
   ~RepeatNode() = default;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc
index 6bca72c785..a82f3367fe 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc
@@ -28,7 +28,7 @@ namespace dataset {
 namespace api {
 
 // Constructor for ShuffleNode
-ShuffleNode::ShuffleNode(std::shared_ptr<Dataset> child, int32_t shuffle_size, bool reset_every_epoch)
+ShuffleNode::ShuffleNode(std::shared_ptr<DatasetNode> child, int32_t shuffle_size, bool reset_every_epoch)
     : shuffle_size_(shuffle_size), shuffle_seed_(GetSeed()), reset_every_epoch_(reset_every_epoch) {
   this->children.push_back(child);
 }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.h
index 07a1503f24..0274cf8b69 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.h
@@ -23,16 +23,16 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace api {
 
-class ShuffleNode : public Dataset {
+class ShuffleNode : public DatasetNode {
  public:
-  ShuffleNode(std::shared_ptr<Dataset> child, int32_t shuffle_size, bool reset_every_epoch);
+  ShuffleNode(std::shared_ptr<DatasetNode> child, int32_t shuffle_size, bool reset_every_epoch);
 
   ~ShuffleNode() = default;
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc
index 5fa20dae1f..c2e5618106 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc
@@ -28,7 +28,7 @@ namespace dataset {
 namespace api {
 
 // Constructor for SkipNode
-SkipNode::SkipNode(std::shared_ptr<Dataset> child, int32_t count) : skip_count_(count) {
+SkipNode::SkipNode(std::shared_ptr<DatasetNode> child, int32_t count) : skip_count_(count) {
   this->children.push_back(child);
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.h
index 81c6d26311..438eb54f99 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.h
@@ -21,16 +21,16 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace api {
-class SkipNode : public Dataset {
+class SkipNode : public DatasetNode {
  public:
   /// \brief Constructor
-  explicit SkipNode(std::shared_ptr<Dataset> child, int32_t count);
+  explicit SkipNode(std::shared_ptr<DatasetNode> child, int32_t count);
 
   /// \brief Destructor
   ~SkipNode() = default;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
index 5bcab68789..1e9cdd9c4d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
@@ -32,7 +32,7 @@ namespace api {
 AlbumNode::AlbumNode(const std::string &dataset_dir, const std::string &data_schema,
                      const std::vector<std::string> &column_names, bool decode,
                      const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache)
-    : Dataset(std::move(cache)),
+    : DatasetNode(std::move(cache)),
       dataset_dir_(dataset_dir),
       schema_path_(data_schema),
       column_names_(column_names),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
index 6ef0d159fb..fb50353df2 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
@@ -21,13 +21,13 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class AlbumNode : public Dataset {
+class AlbumNode : public DatasetNode {
  public:
   /// \brief Constructor
   AlbumNode(const std::string &dataset_dir, const std::string &data_schema,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc
index d5e447acec..a9eaa3442d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc
@@ -31,7 +31,7 @@ namespace api {
 CelebANode::CelebANode(const std::string &dataset_dir, const std::string &usage,
                        const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
                        const std::set<std::string> &extensions, const std::shared_ptr<DatasetCache> &cache)
-    : Dataset(std::move(cache)),
+    : DatasetNode(std::move(cache)),
       dataset_dir_(dataset_dir),
       usage_(usage),
       sampler_(sampler),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.h
index 3829302cff..0e90a72b2c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.h
@@ -23,13 +23,13 @@
 #include <utility>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class CelebANode : public Dataset {
+class CelebANode : public DatasetNode {
  public:
   /// \brief Constructor
   CelebANode(const std::string &dataset_dir, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc
index 835967005d..104c00ee3c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc
@@ -31,7 +31,7 @@ namespace api {
 // Constructor for Cifar100Node
 Cifar100Node::Cifar100Node(const std::string &dataset_dir, const std::string &usage,
                            std::shared_ptr<SamplerObj> sampler, std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
+    : DatasetNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
 
 Status Cifar100Node::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateDatasetDirParam("Cifar100Node", dataset_dir_));
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.h
index bbde01ba20..79dd35486b 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.h
@@ -21,13 +21,13 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class Cifar100Node : public Dataset {
+class Cifar100Node : public DatasetNode {
  public:
   /// \brief Constructor
   Cifar100Node(const std::string &dataset_dir, const std::string &usage, std::shared_ptr<SamplerObj> sampler,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc
index 578e2d9bfb..3d19d8fd79 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc
@@ -31,7 +31,7 @@ namespace api {
 // Constructor for Cifar10Node
 Cifar10Node::Cifar10Node(const std::string &dataset_dir, const std::string &usage, std::shared_ptr<SamplerObj> sampler,
                          std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
+    : DatasetNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
 
 Status Cifar10Node::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateDatasetDirParam("Cifar10Node", dataset_dir_));
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.h
index ff851c420f..3037caefc0 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.h
@@ -21,13 +21,13 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class Cifar10Node : public Dataset {
+class Cifar10Node : public DatasetNode {
  public:
   /// \brief Constructor
   Cifar10Node(const std::string &dataset_dir, const std::string &usage, std::shared_ptr<SamplerObj> sampler,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc
index 538fa63817..17cd4769e4 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc
@@ -33,7 +33,7 @@ namespace api {
 // Constructor for CLUENode
 CLUENode::CLUENode(const std::vector<std::string> clue_files, std::string task, std::string usage, int64_t num_samples,
                    ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)),
+    : DatasetNode(std::move(cache)),
       dataset_files_(clue_files),
       task_(task),
       usage_(usage),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.h
index 8e2eb8aff8..eba34dfab3 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.h
@@ -21,14 +21,14 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 /// \class CLUENode
 /// \brief A Dataset derived class to represent CLUE dataset
-class CLUENode : public Dataset {
+class CLUENode : public DatasetNode {
  public:
   /// \brief Constructor
   CLUENode(const std::vector<std::string> dataset_files, std::string task, std::string usage, int64_t num_samples,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc
index 0b447d9960..3e0729f146 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc
@@ -30,7 +30,7 @@ namespace api {
 // Constructor for CocoNode
 CocoNode::CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
                    const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)),
+    : DatasetNode(std::move(cache)),
       dataset_dir_(dataset_dir),
       annotation_file_(annotation_file),
       task_(task),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h
index 50b621ef01..2593534509 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h
@@ -21,12 +21,12 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
-class CocoNode : public Dataset {
+class CocoNode : public DatasetNode {
  public:
   /// \brief Constructor
   CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc
index 20f3e3701a..1909b1cfe6 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc
@@ -33,7 +33,7 @@ CSVNode::CSVNode(const std::vector<std::string> &csv_files, char field_delim,
                  const std::vector<std::shared_ptr<CsvBase>> &column_defaults,
                  const std::vector<std::string> &column_names, int64_t num_samples, ShuffleMode shuffle,
                  int32_t num_shards, int32_t shard_id, std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)),
+    : DatasetNode(std::move(cache)),
       dataset_files_(csv_files),
       field_delim_(field_delim),
       column_defaults_(column_defaults),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.h
index ca673c0ee5..9828d5d03f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
@@ -47,7 +47,7 @@ class CsvRecord : public CsvBase {
   T value;
 };
 
-class CSVNode : public Dataset {
+class CSVNode : public DatasetNode {
  public:
   /// \brief Constructor
   CSVNode(const std::vector<std::string> &dataset_files, char field_delim,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.h
index ae52020289..6237346f11 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 #include "minddata/dataset/util/status.h"
 
 namespace mindspore {
@@ -31,7 +31,7 @@ namespace api {
 
 /// \class GeneratorNode
 /// \brief A Dataset derived class to represent GeneratorNode dataset
-class GeneratorNode : public Dataset {
+class GeneratorNode : public DatasetNode {
  public:
   /// \brief Constructor
   GeneratorNode(py::function generator_function, const std::vector<std::string> &column_names,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc
index 338308a0c8..5c4159435d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc
@@ -40,7 +40,7 @@ ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shar
       recursive_(recursive),
       class_indexing_(class_indexing),
       exts_(extensions),
-      Dataset(std::move(cache)) {}
+      DatasetNode(std::move(cache)) {}
 
 Status ImageFolderNode::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateDatasetDirParam("ImageFolderNode", dataset_dir_));
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.h
index fe4e4cd13d..7922eda8bd 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.h
@@ -24,7 +24,7 @@
 #include <vector>
 
 #include "mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache.h"
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
@@ -33,7 +33,7 @@ namespace api {
 
 /// \class ImageFolderNode
 /// \brief A Dataset derived class to represent ImageFolder dataset
-class ImageFolderNode : public Dataset {
+class ImageFolderNode : public DatasetNode {
  public:
   /// \brief Constructor
   ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc
index 41141b0ce5..cbb01a9cf2 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc
@@ -32,7 +32,7 @@ ManifestNode::ManifestNode(const std::string &dataset_file, const std::string &u
                            const std::shared_ptr<SamplerObj> &sampler,
                            const std::map<std::string, int32_t> &class_indexing, bool decode,
                            std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)),
+    : DatasetNode(std::move(cache)),
       dataset_file_(dataset_file),
       usage_(usage),
       decode_(decode),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.h
index 99d7ef435b..b8da1555d5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
-class ManifestNode : public Dataset {
+class ManifestNode : public DatasetNode {
  public:
   /// \brief Constructor
   ManifestNode(const std::string &dataset_file, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.h
index 1663f14762..ea10456bcf 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
-class MindDataNode : public Dataset {
+class MindDataNode : public DatasetNode {
  public:
   /// \brief Constructor
   MindDataNode(const std::vector<std::string> &dataset_files, const std::vector<std::string> &columns_list,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
index 6d6e1fdee8..5feee17998 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
@@ -30,7 +30,7 @@ namespace api {
 
 MnistNode::MnistNode(std::string dataset_dir, std::string usage, std::shared_ptr<SamplerObj> sampler,
                      std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
+    : DatasetNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
 
 Status MnistNode::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateDatasetDirParam("MnistNode", dataset_dir_));
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
index 713ba94fdf..663e2ede97 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
@@ -21,13 +21,13 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class MnistNode : public Dataset {
+class MnistNode : public DatasetNode {
  public:
   /// \brief Constructor
   MnistNode(std::string dataset_dir, std::string usage, std::shared_ptr<SamplerObj> sampler,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
index 09e980a14c..79d995438a 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
@@ -22,13 +22,13 @@
 #include <utility>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class RandomNode : public Dataset {
+class RandomNode : public DatasetNode {
  public:
   // Some constants to provide limits to random generation.
   static constexpr int32_t kMaxNumColumns = 4;
@@ -38,7 +38,7 @@ class RandomNode : public Dataset {
   /// \brief Constructor
   RandomNode(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema, const std::vector<std::string> &columns_list,
              const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache)
-      : Dataset(std::move(cache)),
+      : DatasetNode(std::move(cache)),
         total_rows_(total_rows),
         schema_path_(""),
         schema_(std::move(schema)),
@@ -48,7 +48,7 @@ class RandomNode : public Dataset {
   /// \brief Constructor
   RandomNode(const int32_t &total_rows, std::string schema_path, const std::vector<std::string> &columns_list,
              const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache)
-      : Dataset(std::move(cache)),
+      : DatasetNode(std::move(cache)),
         total_rows_(total_rows),
         schema_path_(schema_path),
         columns_list_(columns_list),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc
index 8af0388d4a..2d4841f9e7 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc
@@ -31,7 +31,7 @@ namespace api {
 // Constructor for TextFileNode
 TextFileNode::TextFileNode(std::vector<std::string> dataset_files, int32_t num_samples, ShuffleMode shuffle,
                            int32_t num_shards, int32_t shard_id, std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)),
+    : DatasetNode(std::move(cache)),
       dataset_files_(dataset_files),
       num_samples_(num_samples),
       shuffle_(shuffle),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.h
index e5762f8b37..9011aa1603 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.h
@@ -21,14 +21,14 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 /// \class TextFileNode
 /// \brief A Dataset derived class to represent TextFile dataset
-class TextFileNode : public Dataset {
+class TextFileNode : public DatasetNode {
  public:
   /// \brief Constructor
   TextFileNode(std::vector<std::string> dataset_files, int32_t num_samples, ShuffleMode shuffle, int32_t num_shards,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
index 486c481e19..7492ec1131 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
@@ -55,6 +55,53 @@ bool ValidateFirstRowCrc(const std::string &filename) {
 
 // Validator for TFRecordNode
 Status TFRecordNode::ValidateParams() {
+  if (dataset_files_.empty()) {
+    std::string err_msg = "TFRecordNode: dataset_files is not specified.";
+    MS_LOG(ERROR) << err_msg;
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
+  }
+
+  for (const auto &f : dataset_files_) {
+    Path dataset_file(f);
+    if (!dataset_file.Exists()) {
+      std::string err_msg = "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist.";
+      MS_LOG(ERROR) << err_msg;
+
+      return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
+    }
+  }
+
+  if (num_samples_ < 0) {
+    std::string err_msg = "TFRecordNode: Invalid number of samples: " + std::to_string(num_samples_);
+    MS_LOG(ERROR) << err_msg;
+
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
+  }
+
+  if (num_shards_ <= 0) {
+    std::string err_msg = "TFRecordNode: Invalid num_shards: " + std::to_string(num_shards_);
+    MS_LOG(ERROR) << err_msg;
+
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
+  }
+
+  if (shard_id_ < 0 || shard_id_ >= num_shards_) {
+    std::string err_msg = "TFRecordNode: Invalid input, shard_id: " + std::to_string(shard_id_) +
+                          ", num_shards: " + std::to_string(num_shards_);
+    MS_LOG(ERROR) << err_msg;
+
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
+  }
+
+  if (cache_ == nullptr && !shard_equal_rows_ && dataset_files_.size() < num_shards_) {
+    // This check only makes sense in a non-cache path. We should make sure there is at least one file per
+    // shard in file-based sharding
+    std::string err_msg =
+      "TFRecordNode: Invalid number of dataset files, should at least be " + std::to_string(num_shards_);
+    MS_LOG(ERROR) << err_msg;
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
+  }
+
   std::vector<std::string> invalid_files(dataset_files_.size());
   auto it = std::copy_if(dataset_files_.begin(), dataset_files_.end(), invalid_files.begin(),
                          [](const std::string &filename) { return !ValidateFirstRowCrc(filename); });
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
index ebc493d09a..08e4d094c4 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
@@ -22,21 +22,21 @@
 #include <utility>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 /// \class TFRecordNode
 /// \brief A Dataset derived class to represent TFRecord dataset
-class TFRecordNode : public Dataset {
+class TFRecordNode : public DatasetNode {
  public:
   /// \brief Constructor
   /// \note Parameter 'schema' is the path to the schema file
   TFRecordNode(const std::vector<std::string> &dataset_files, std::string schema,
                const std::vector<std::string> &columns_list, int64_t num_samples, ShuffleMode shuffle,
                int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr<DatasetCache> cache)
-      : Dataset(std::move(cache)),
+      : DatasetNode(std::move(cache)),
         dataset_files_(dataset_files),
         schema_path_(schema),
         columns_list_(columns_list),
@@ -51,7 +51,7 @@ class TFRecordNode : public Dataset {
   TFRecordNode(const std::vector<std::string> &dataset_files, std::shared_ptr<SchemaObj> schema,
                const std::vector<std::string> &columns_list, int64_t num_samples, ShuffleMode shuffle,
                int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr<DatasetCache> cache)
-      : Dataset(std::move(cache)),
+      : DatasetNode(std::move(cache)),
         dataset_files_(dataset_files),
         schema_obj_(schema),
         columns_list_(columns_list),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc
index f263ceb096..68ade8aa07 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc
@@ -32,7 +32,7 @@ namespace api {
 VOCNode::VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
                  const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
                  std::shared_ptr<DatasetCache> cache)
-    : Dataset(std::move(cache)),
+    : DatasetNode(std::move(cache)),
       dataset_dir_(dataset_dir),
       task_(task),
       usage_(usage),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h
index a61b758fd6..ed3656397c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h
@@ -22,12 +22,12 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
-class VOCNode : public Dataset {
+class VOCNode : public DatasetNode {
  public:
   /// \brief Constructor
   VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.cc
index c4c83903dd..8642cdb79f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.cc
@@ -27,7 +27,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 // Constructor for SyncWaitNode
-SyncWaitNode::SyncWaitNode(std::shared_ptr<Dataset> child, const std::string &condition_name, int32_t num_batch,
+SyncWaitNode::SyncWaitNode(std::shared_ptr<DatasetNode> child, const std::string &condition_name, int32_t num_batch,
                            py::function callback)
     : condition_name_(condition_name), num_batch_(num_batch), callback_(callback) {
   this->children.push_back(child);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.h
index 3cd033b49c..b108e257af 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
@@ -30,10 +30,10 @@ namespace api {
 
 /// \class SyncWaitNode
 /// \brief A Dataset derived class to represent SyncWaitNode dataset
-class SyncWaitNode : public Dataset {
+class SyncWaitNode : public DatasetNode {
  public:
   /// \brief Constructor
-  explicit SyncWaitNode(std::shared_ptr<Dataset> child, const std::string &condition_name, int32_t num_batch,
+  explicit SyncWaitNode(std::shared_ptr<DatasetNode> child, const std::string &condition_name, int32_t num_batch,
                         py::function callback);
 
   /// \brief Destructor
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc
index b11bd7d345..9a3fed7b87 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc
@@ -27,7 +27,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 // Constructor for TakeNode
-TakeNode::TakeNode(std::shared_ptr<Dataset> child, int32_t count) : take_count_(count) {
+TakeNode::TakeNode(std::shared_ptr<DatasetNode> child, int32_t count) : take_count_(count) {
   this->children.push_back(child);
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.h
index 6706878be5..dfc7199384 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.h
@@ -21,17 +21,17 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace api {
 
-class TakeNode : public Dataset {
+class TakeNode : public DatasetNode {
  public:
   /// \brief Constructor
-  explicit TakeNode(std::shared_ptr<Dataset> child, int32_t count);
+  explicit TakeNode(std::shared_ptr<DatasetNode> child, int32_t count);
 
   /// \brief Destructor
   ~TakeNode() = default;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
index 787b48df99..d566d30d3a 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
@@ -28,14 +28,8 @@ namespace dataset {
 namespace api {
 
 // Constructor for TransferNode
-TransferNode::TransferNode(std::shared_ptr<Dataset> child, const std::string &queue_name, int32_t device_id,
-                           const std::string &device_type, bool send_epoch_end)
-    : queue_name_(queue_name),
-      device_id_(device_id),
-      device_type_(device_type),
-      prefetch_size_(16),
-      send_epoch_end_(send_epoch_end),
-      total_batch_(0) {
+TransferNode::TransferNode(std::shared_ptr<DatasetNode> child, bool send_epoch_end)
+    : prefetch_size_(16), send_epoch_end_(send_epoch_end), total_batch_(0) {
   this->children.push_back(child);
 }
 
@@ -48,6 +42,15 @@ Status TransferNode::ValidateParams() {
 
 // Function to build TransferNode
 std::vector<std::shared_ptr<DatasetOp>> TransferNode::Build() {
+  // Get a uuid for queue name
+  queue_name_ = Services::GetUniqueID();
+  // TODO(CRC):
+  // Get device type from ms context
+  device_type_ = "CPU";
+  // Get device ID from children
+  device_id_ = 0;
+  RETURN_EMPTY_IF_ERROR(TransferNode::get_distribution(shared_from_this(), &device_id_));
+
   // A vector containing shared pointer to the Dataset Ops that this object will create
   std::vector<std::shared_ptr<DatasetOp>> node_ops;
 
@@ -67,13 +70,13 @@ std::vector<std::shared_ptr<DatasetOp>> TransferNode::Build() {
 }
 
 // Function to get the device_id
-Status TransferNode::get_distribution(std::shared_ptr<Dataset> ds, int32_t *device_id) {
+Status TransferNode::get_distribution(std::shared_ptr<DatasetNode> ds, int32_t *device_id) {
   // Get device id according to the type of dataset
   Status rc = ds->GetShardId(device_id);
   if (rc != Status::OK()) {
     // Get device id from the child node
-    if (ds->children.size()) {
-      ds = ds->children[0];
+    if (ds->Children().size()) {
+      ds = ds->Children()[0];
       return TransferNode::get_distribution(ds, device_id);
     } else {
       std::string err_msg = "Unknown dataset type.";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.h
index 000287155b..34f00800e5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.h
@@ -21,18 +21,17 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace api {
 
-class TransferNode : public Dataset {
+class TransferNode : public DatasetNode {
  public:
   /// \brief Constructor
-  TransferNode(std::shared_ptr<Dataset> child, const std::string &queue_name, int32_t device_id,
-               const std::string &device_type, bool send_epoch_end);
+  TransferNode(std::shared_ptr<DatasetNode> child, bool send_epoch_end);
 
   /// \brief Destructor
   ~TransferNode() = default;
@@ -45,7 +44,7 @@ class TransferNode : public Dataset {
   /// \return Status Status::OK() if all the parameters are valid
   Status ValidateParams() override;
 
-  static Status get_distribution(std::shared_ptr<Dataset> ds, int32_t *device_id);
+  static Status get_distribution(std::shared_ptr<DatasetNode> ds, int32_t *device_id);
 
  private:
   std::string queue_name_;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.cc
index b34f385530..2099cd1035 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.cc
@@ -27,7 +27,7 @@ namespace mindspore {
 namespace dataset {
 namespace api {
 
-ZipNode::ZipNode(const std::vector<std::shared_ptr<Dataset>> &datasets) : datasets_(datasets) {
+ZipNode::ZipNode(const std::vector<std::shared_ptr<DatasetNode>> &datasets) : datasets_(datasets) {
   for (auto dataset : datasets_) {
     this->children.push_back(dataset);
   }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.h
index 0073f66e92..f7046842a9 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.h
@@ -21,16 +21,16 @@
 #include <string>
 #include <vector>
 
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
 
-class ZipNode : public Dataset {
+class ZipNode : public DatasetNode {
  public:
   /// \brief Constructor
-  explicit ZipNode(const std::vector<std::shared_ptr<Dataset>> &datasets);
+  explicit ZipNode(const std::vector<std::shared_ptr<DatasetNode>> &datasets);
 
   /// \brief Destructor
   ~ZipNode() = default;
@@ -44,7 +44,7 @@ class ZipNode : public Dataset {
   Status ValidateParams() override;
 
  private:
-  std::vector<std::shared_ptr<Dataset>> datasets_;
+  std::vector<std::shared_ptr<DatasetNode>> datasets_;
 };
 
 }  // namespace api
diff --git a/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.cc b/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.cc
new file mode 100644
index 0000000000..ae9ab4c275
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.cc
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/engine/python_runtime_context.h"
+#include "pybind11/pybind11.h"
+
+namespace mindspore::dataset {
+
+Status PythonRuntimeContext::Terminate() {
+  // Release GIL before joining all threads
+  py::gil_scoped_release gil_release;
+  return tree_consumer_->Terminate();
+}
+}  // namespace mindspore::dataset
diff --git a/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.h b/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.h
new file mode 100644
index 0000000000..63353b7efd
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.h
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PYTHON_RUNTIME_CONTEXT_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PYTHON_RUNTIME_CONTEXT_H_
+
+#include <memory>
+#include <utility>
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/engine/consumers/tree_consumer.h"
+#include "minddata/dataset/engine/consumers/python_tree_consumer.h"
+#include "minddata/dataset/engine/runtime_context.h"
+
+namespace mindspore::dataset {
+class RuntimeContext;
+
+/// Class the represents single runtime instance which can consume data from a data pipeline
+class PythonRuntimeContext : public RuntimeContext {
+ public:
+  /// Method to terminate the runtime, this will not release the resources
+  /// \return Status error code
+  Status Terminate() override;
+
+  ~PythonRuntimeContext() {
+    Terminate();
+    {
+      py::gil_scoped_acquire gil_acquire;
+      tree_consumer_.reset();
+    }
+  }
+
+  PythonIteratorConsumer *GetPythonConsumer() { return dynamic_cast<PythonIteratorConsumer *>(tree_consumer_.get()); }
+};
+
+}  // namespace mindspore::dataset
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PYTHON_RUNTIME_CONTEXT_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/runtime_context.cc b/mindspore/ccsrc/minddata/dataset/engine/runtime_context.cc
index e82d6a1bd9..1d15495c7b 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/runtime_context.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/runtime_context.cc
@@ -19,7 +19,7 @@
 #include <utility>
 namespace mindspore::dataset {
 
-void RuntimeContext::AssignConsumer(std::unique_ptr<TreeConsumer> tree_consumer) {
+void RuntimeContext::AssignConsumer(std::shared_ptr<TreeConsumer> tree_consumer) {
   tree_consumer_ = std::move(tree_consumer);
 }
 }  // namespace mindspore::dataset
diff --git a/mindspore/ccsrc/minddata/dataset/engine/runtime_context.h b/mindspore/ccsrc/minddata/dataset/engine/runtime_context.h
index 855acc3fc0..a2f3e17b47 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/runtime_context.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/runtime_context.h
@@ -40,14 +40,16 @@ class RuntimeContext {
 
   /// Set the tree consumer
   /// \param tree_consumer to be assigned
-  void AssignConsumer(std::unique_ptr<TreeConsumer> tree_consumer);
+  void AssignConsumer(std::shared_ptr<TreeConsumer> tree_consumer);
 
   /// Get the tree consumer
   /// \return Raw pointer to the tree consumer.
   TreeConsumer *GetConsumer() { return tree_consumer_.get(); }
 
- private:
-  std::unique_ptr<TreeConsumer> tree_consumer_;
+  ~RuntimeContext() { Terminate(); }
+
+ protected:
+  std::shared_ptr<TreeConsumer> tree_consumer_;
 };
 
 }  // namespace mindspore::dataset
diff --git a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc
index c891dc1f25..07d01ceec1 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc
@@ -22,7 +22,7 @@
 namespace mindspore {
 namespace dataset {
 
-Status TreeAdapter::BuildAndPrepare(std::shared_ptr<api::Dataset> root_ir, int32_t num_epoch) {
+Status TreeAdapter::BuildAndPrepare(std::shared_ptr<api::DatasetNode> root_ir, int32_t num_epoch) {
   // Check whether this function has been called before. If so, return failure
   CHECK_FAIL_RETURN_UNEXPECTED(tree_ == nullptr, "ExecutionTree is already built.");
   RETURN_UNEXPECTED_IF_NULL(root_ir);
@@ -65,7 +65,7 @@ Status TreeAdapter::GetNext(TensorRow *row) {
   return Status::OK();
 }
 
-Status TreeAdapter::DFSBuildTree(std::shared_ptr<api::Dataset> ir, std::shared_ptr<DatasetOp> *op) {
+Status TreeAdapter::DFSBuildTree(std::shared_ptr<api::DatasetNode> ir, std::shared_ptr<DatasetOp> *op) {
   // validate the op can be built first before building the DatasetOp
   RETURN_IF_NOT_OK(ir->ValidateParams());
   std::vector<std::shared_ptr<DatasetOp>> ops = ir->Build();
@@ -80,7 +80,7 @@ Status TreeAdapter::DFSBuildTree(std::shared_ptr<api::Dataset> ir, std::shared_p
   }
 
   // Build the children of ir, once they return, add the return value to *op
-  for (std::shared_ptr<api::Dataset> child_ir : ir->children) {
+  for (const auto &child_ir : ir->Children()) {
     std::shared_ptr<DatasetOp> child_op;
     RETURN_IF_NOT_OK(DFSBuildTree(child_ir, &child_op));
     RETURN_IF_NOT_OK(ops.back()->AddChild(child_op));  // append children to the last of ops
diff --git a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.h b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.h
index d13c47c413..a1f8201cee 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.h
@@ -24,12 +24,12 @@
 #include <vector>
 
 #include "minddata/dataset/engine/execution_tree.h"
-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 
 namespace mindspore {
 namespace dataset {
 namespace api {
-class Dataset;
+class DatasetNode;
 }
 class TreeAdapter {
  public:
@@ -40,7 +40,7 @@ class TreeAdapter {
   // This will construct an ExeTree from a Dataset root and Prepare() the ExeTree
   // This function is only meant to be called once and needs to be called before GetNext
   // ExeTree will be launched when the first GetNext is called
-  Status BuildAndPrepare(std::shared_ptr<api::Dataset> root, int32_t num_epoch = -1);
+  Status BuildAndPrepare(std::shared_ptr<api::DatasetNode> root, int32_t num_epoch = -1);
 
   // This is the main method TreeConsumer uses to interact with TreeAdapter
   // 1. GetNext will Launch() the ExeTree on its first call by iterator (tree is already prepared)
@@ -62,7 +62,7 @@ class TreeAdapter {
  private:
   // This RECURSIVE function converts IR nodes into DatasetOp in ExecutionTree. IR could build a vector of ops. In
   // such case, the first node is returned. Op is added as child when the current function returns.
-  Status DFSBuildTree(std::shared_ptr<api::Dataset> ir, std::shared_ptr<DatasetOp> *op);
+  Status DFSBuildTree(std::shared_ptr<api::DatasetNode> ir, std::shared_ptr<DatasetOp> *op);
 
   std::unique_ptr<DataBuffer> cur_db_;
   std::unordered_map<std::string, int32_t> column_name_map_;
diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h
index 2e56ded0de..1cb9426bb0 100644
--- a/mindspore/ccsrc/minddata/dataset/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@@ -26,10 +26,12 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
-#include "mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache.h"
+#include "minddata/dataset/engine/ir/cache/dataset_cache.h"
+
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/engine/consumers/tree_consumer.h"
 #include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 #include "minddata/dataset/include/iterator.h"
 #include "minddata/dataset/include/samplers.h"
 #include "minddata/dataset/include/tensor.h"
@@ -45,7 +47,6 @@
 namespace mindspore {
 namespace dataset {
 
-// Forward declare
 class DatasetOp;
 class DataSchema;
 class Tensor;
@@ -57,6 +58,8 @@ class Vocab;
 #endif
 
 namespace api {
+// Forward declare
+class DatasetNode;
 class Dataset;
 class Iterator;
 
@@ -64,630 +67,109 @@ class TensorOperation;
 class SchemaObj;
 class SamplerObj;
 // Datasets classes (in alphabetical order)
-class AlbumNode;
-class CelebANode;
-class Cifar10Node;
-class Cifar100Node;
-class CLUENode;
-class CocoNode;
-class CSVNode;
 class CsvBase;
-class ImageFolderNode;
-class BatchNode;
-#ifndef ENABLE_ANDROID
-class ManifestNode;
-class MindDataNode;
-#endif
-class MnistNode;
-class RandomNode;
-class TextFileNode;
-#ifndef ENABLE_ANDROID
-class TFRecordNode;
-class VOCNode;
-#endif
+class BatchDataset;
 // Dataset Op classes (in alphabetical order)
 #ifndef ENABLE_ANDROID
-class BucketBatchByLengthNode;
+class BucketBatchByLengthDataset;
 #endif
-class ConcatNode;
-class MapNode;
-class ProjectNode;
-class RenameNode;
-class RepeatNode;
-class ShuffleNode;
-class SkipNode;
-class TakeNode;
-class TransferNode;
-class ZipNode;
-
-#define RETURN_EMPTY_IF_ERROR(_s) \
-  do {                            \
-    Status __rc = (_s);           \
-    if (__rc.IsError()) {         \
-      MS_LOG(ERROR) << __rc;      \
-      return {};                  \
-    }                             \
-  } while (false)
-
-Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
-                    int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr<DatasetOp> *shuffle_op);
-
-// Helper function to validate dataset files parameter
-Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector<std::string> &dataset_files);
-
-// Helper function to validate dataset num_shards and shard_id parameters
-Status ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_shards, int32_t shard_id);
-
-// Helper function to validate dataset sampler parameter
-Status ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler);
-
-Status ValidateStringValue(const std::string &dataset_name, const std::string &str,
-                           const std::unordered_set<std::string> &valid_strings);
-
-// Helper function to validate dataset input/output column parameterCD -
-Status ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param,
-                                  const std::vector<std::string> &columns);
-
-// Helper function to validate dataset directory parameter
-Status ValidateDatasetDirParam(const std::string &dataset_name, std::string dataset_dir);
-
-/// \brief Function to create a SchemaObj
-/// \param[in] schema_file Path of schema file
-/// \return Shared pointer to the current schema
-std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");
+class ConcatDataset;
+class MapDataset;
+class ProjectDataset;
+class RenameDataset;
+class RepeatDataset;
+class ShuffleDataset;
+class SkipDataset;
+class TakeDataset;
+class TransferDataset;
+class ZipDataset;
 
-/// \brief Function to create an AlbumNode
-/// \notes The generated dataset is specified through setting a schema
-/// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] data_schema Path to dataset schema file
-/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns.
-///     (default = {})
-/// \param[in] decode the option to decode the images in dataset (default = false)
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current Dataset
-std::shared_ptr<AlbumNode> Album(const std::string &dataset_dir, const std::string &data_schema,
-                                 const std::vector<std::string> &column_names = {}, bool decode = false,
-                                 const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                 const std::shared_ptr<DatasetCache> &cache = nullptr);
+/// \class Dataset datasets.h
+/// \brief A base class to represent a dataset in the data pipeline.
+class Dataset : public std::enable_shared_from_this<Dataset> {
+ public:
+  // need friend class so they can access the children_ field
+  friend class Iterator;
+  friend class TransferNode;
+  friend class mindspore::dataset::TreeAdapter;
 
-/// \brief Function to create a CelebANode
-/// \notes The generated dataset has two columns ['image', 'attr'].
-///      The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
-/// \param[in] dataset_dir Path to the root directory that contains the dataset.
-/// \param[in] usage One of "all", "train", "valid" or "test" (default = "all").
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] decode Decode the images after reading (default=false).
-/// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current Dataset
-std::shared_ptr<CelebANode> CelebA(const std::string &dataset_dir, const std::string &usage = "all",
-                                   const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), bool decode = false,
-                                   const std::set<std::string> &extensions = {},
-                                   const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \brief Constructor
+  Dataset();
 
-/// \brief Function to create a Cifar10 Dataset
-/// \notes The generated dataset has two columns ["image", "label"]
-/// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] usage of CIFAR10, can be "train", "test" or "all" (default = "all").
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current Dataset
-std::shared_ptr<Cifar10Node> Cifar10(const std::string &dataset_dir, const std::string &usage = "all",
-                                     const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                     const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \brief Destructor
+  ~Dataset() = default;
 
-/// \brief Function to create a Cifar100 Dataset
-/// \notes The generated dataset has three columns ["image", "coarse_label", "fine_label"]
-/// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] usage of CIFAR100, can be "train", "test" or "all" (default = "all").
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current Dataset
-std::shared_ptr<Cifar100Node> Cifar100(const std::string &dataset_dir, const std::string &usage = "all",
-                                       const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                       const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \brief Gets the dataset size
+  /// \return dataset size. If failed, return -1
+  int64_t GetDatasetSize();
 
-/// \brief Function to create a CLUENode
-/// \notes The generated dataset has a variable number of columns depending on the task and usage
-/// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
-///     will be sorted in a lexicographical order.
-/// \param[in] task The kind of task, one of "AFQMC", "TNEWS", "IFLYTEK", "CMNLI", "WSC" and "CSL" (default="AFQMC").
-/// \param[in] usage Be used to "train", "test" or "eval" data (default="train").
-/// \param[in] num_samples The number of samples to be included in the dataset.
-///     (Default = 0 means all samples.)
-/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal)
-///     Can be any of:
-///     ShuffleMode::kFalse - No shuffling is performed.
-///     ShuffleMode::kFiles - Shuffle files only.
-///     ShuffleMode::kGlobal - Shuffle both the files and samples.
-/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
-/// \param[in] shard_id The shard ID within num_shards. This argument should be
-///     specified only when num_shards is also specified. (Default = 0)
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current CLUENode
-std::shared_ptr<CLUENode> CLUE(const std::vector<std::string> &dataset_files, const std::string &task = "AFQMC",
-                               const std::string &usage = "train", int64_t num_samples = 0,
-                               ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0,
-                               const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \brief Gets the output type
+  /// \return a vector of DataType. If failed, return an empty vector
+  std::vector<DataType> GetOutputTypes();
 
-/// \brief Function to create a CocoNode
-/// \notes The generated dataset has multi-columns :
-///     - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
-///                                  ['iscrowd', dtype=uint32]].
-///     - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]].
-///     - task='Keypoint', column: [['image', dtype=uint8], ['keypoints', dtype=float32],
-///                                 ['num_keypoints', dtype=uint32]].
-///     - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
-///                                 ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
-/// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] annotation_file Path to the annotation json
-/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
-/// \param[in] decode Decode the images after reading
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current Dataset
-std::shared_ptr<CocoNode> Coco(const std::string &dataset_dir, const std::string &annotation_file,
-                               const std::string &task = "Detection", const bool &decode = false,
-                               const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                               const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \brief Gets the output shape
+  /// \return a vector of TensorShape. If failed, return am empty vector
+  std::vector<TensorShape> GetOutputShapes();
 
-/// \brief Function to create a CSVNode
-/// \notes The generated dataset has a variable number of columns
-/// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
-///    will be sorted in a lexicographical order.
-/// \param[in] field_delim A char that indicates the delimiter to separate fields (default=',').
-/// \param[in] column_defaults List of default values for the CSV field (default={}). Each item in the list is
-///    either a valid type (float, int, or string). If this is not provided, treats all columns as string type.
-/// \param[in] column_names List of column names of the dataset (default={}). If this is not provided, infers the
-///    column_names from the first row of CSV file.
-/// \param[in] num_samples The number of samples to be included in the dataset.
-///    (Default = 0 means all samples.)
-/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode::kGlobal)
-///    Can be any of:
-///    ShuffleMode::kFalse - No shuffling is performed.
-///    ShuffleMode::kFiles - Shuffle files only.
-///    ShuffleMode::kGlobal - Shuffle both the files and samples.
-/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
-/// \param[in] shard_id The shard ID within num_shards. This argument should be
-///    specified only when num_shards is also specified. (Default = 0)
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current Dataset
-std::shared_ptr<CSVNode> CSV(const std::vector<std::string> &dataset_files, char field_delim = ',',
-                             const std::vector<std::shared_ptr<CsvBase>> &column_defaults = {},
-                             const std::vector<std::string> &column_names = {}, int64_t num_samples = 0,
-                             ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0,
-                             const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \brief Gets the batch size
+  /// \return int64_t
+  int64_t GetBatchSize();
 
-/// \brief Function to create an ImageFolderNode
-/// \notes A source dataset that reads images from a tree of directories
-///     All images within one folder have the same label
-///     The generated dataset has two columns ["image", "label"]
-/// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] decode A flag to decode in ImageFolder
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] extensions File extensions to be read
-/// \param[in] class_indexing a class name to label map
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current ImageFolderNode
-std::shared_ptr<ImageFolderNode> ImageFolder(const std::string &dataset_dir, bool decode = false,
-                                             const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                             const std::set<std::string> &extensions = {},
-                                             const std::map<std::string, int32_t> &class_indexing = {},
-                                             const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \brief Gets the the repeat count
+  /// \return int64_t
+  int64_t GetRepeatCount();
 
-#ifndef ENABLE_ANDROID
-/// \brief Function to create a ManifestNode
-/// \notes The generated dataset has two columns ["image", "label"]
-/// \param[in] dataset_file The dataset file to be read
-/// \param[in] usage Need "train", "eval" or "inference" data (default="train")
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
-///     names will be sorted alphabetically and each class will be given a unique index starting from 0).
-/// \param[in] decode Decode the images after reading (default=false).
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current ManifestNode
-std::shared_ptr<ManifestNode> Manifest(const std::string &dataset_file, const std::string &usage = "train",
-                                       const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                       const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,
-                                       const std::shared_ptr<DatasetCache> &cache = nullptr);
-#endif
+  /// \brief Gets the number of classes
+  /// \return number of classes. If failed, return -1
+  int64_t GetNumClasses();
 
-#ifndef ENABLE_ANDROID
-/// \brief Function to create a MindDataNode
-/// \param[in] dataset_file File name of one component of a mindrecord source. Other files with identical source
-///     in the same path will be found and loaded automatically.
-/// \param[in] columns_list List of columns to be read (default={})
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()),
-///     supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler.
-/// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list.
-/// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards.
-/// \return Shared pointer to the current MindDataNode
-std::shared_ptr<MindDataNode> MindData(const std::string &dataset_file,
-                                       const std::vector<std::string> &columns_list = {},
-                                       const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                       nlohmann::json padded_sample = nullptr, int64_t num_padded = 0);
+  /// \brief Setter function for runtime number of workers
+  /// \param[in] num_workers The number of threads in this operator
+  /// \return Shared pointer to the original object
+  std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers);
 
-/// \brief Function to create a MindDataNode
-/// \param[in] dataset_files List of dataset files to be read directly.
-/// \param[in] columns_list List of columns to be read (default={})
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()),
-///     supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler.
-/// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list.
-/// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards.
-/// \return Shared pointer to the current MindDataNode
-std::shared_ptr<MindDataNode> MindData(const std::vector<std::string> &dataset_files,
-                                       const std::vector<std::string> &columns_list = {},
-                                       const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                       nlohmann::json padded_sample = nullptr, int64_t num_padded = 0);
-#endif
+  /// \brief Function to create an Iterator over the Dataset pipeline
+  /// \param[in] columns List of columns to be used to specify the order of columns
+  /// \return Shared pointer to the Iterator
+  std::shared_ptr<Iterator> CreateIterator(std::vector<std::string> columns = {});
 
-/// \brief Function to create a MnistNode
-/// \notes The generated dataset has two columns ["image", "label"]
-/// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all").
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current MnistNode
-std::shared_ptr<MnistNode> Mnist(const std::string &dataset_dir, const std::string &usage = "all",
-                                 const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                 const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \brief Function to transfer data through a device.
+  /// \notes If device is Ascend, features of data will be transferred one by one. The limitation
+  ///     of data transmission per time is 256M.
+  /// \param[in] send_epoch_end Whether to send end of sequence to device or not (default=True).
+  /// \return Returns true if no error encountered else false.
+  bool DeviceQueue(bool send_epoch_end = true);
 
-/// \brief Function to create a ConcatNode
-/// \notes Reload "+" operator to concat two datasets
-/// \param[in] datasets1 Shared pointer to the first dataset to be concatenated
-/// \param[in] datasets2 Shared pointer to the second dataset to be concatenated
-/// \return Shared pointer to the current ConcatNode
-std::shared_ptr<ConcatNode> operator+(const std::shared_ptr<Dataset> &datasets1,
-                                      const std::shared_ptr<Dataset> &datasets2);
+#ifndef ENABLE_ANDROID
+  /// \brief Function to create a Saver to save the dynamic data processed by the dataset pipeline
+  /// \note Usage restrictions:
+  ///     1. Supported dataset formats: 'mindrecord' only
+  ///     2. To save the samples in order, set dataset's shuffle to false and num_files to 1.
+  ///     3. Before calling the function, do not use batch operator, repeat operator or data augmentation operators
+  ///        with random attribute in map operator.
+  ///     4. Mindrecord does not support bool, uint64, multi-dimensional uint8(drop dimension) nor
+  ///        multi-dimensional string.
+  /// \param[in] file_name Path to dataset file
+  /// \param[in] num_files Number of dataset files (default=1)
+  /// \param[in] file_type Dataset format (default="mindrecord")
+  /// \return Returns true if no error encountered else false
+  bool Save(std::string dataset_path, int32_t num_files = 1, std::string dataset_type = "mindrecord");
+#endif
 
-/// \brief Function to create a RandomNode
-/// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random)
-/// \param[in] schema SchemaObj to set column type, data type and data shape
-/// \param[in] columns_list List of columns to be read (default={}, read all columns)
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current Dataset
-template <typename T = std::shared_ptr<SchemaObj>>
-std::shared_ptr<RandomNode> RandomData(const int32_t &total_rows = 0, const T &schema = nullptr,
-                                       const std::vector<std::string> &columns_list = {},
-                                       const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                                       const std::shared_ptr<DatasetCache> &cache = nullptr) {
-  if (total_rows < 0) {
-    MS_LOG(ERROR) << "RandomNode: total_rows must be greater than or equal 0, now get " << total_rows;
-    return nullptr;
-  }
-  if (sampler == nullptr) {
-    MS_LOG(ERROR) << "RandomNode: Sampler is not constructed correctly, sampler: nullptr";
-    return nullptr;
-  }
-  if (!columns_list.empty()) {
-    for (uint32_t i = 0; i < columns_list.size(); ++i) {
-      if (columns_list[i].empty()) {
-        MS_LOG(ERROR) << "RandomNode:columns_list"
-                      << "[" << i << "] should not be empty";
-        return nullptr;
-      }
-    }
-    std::set<std::string> columns_set(columns_list.begin(), columns_list.end());
-    if (columns_set.size() != columns_list.size()) {
-      MS_LOG(ERROR) << "RandomNode:columns_list: Every column name should not be same with others";
-      return nullptr;
-    }
-  }
-  std::shared_ptr<RandomNode> ds;
-  if constexpr (std::is_same<T, std::nullptr_t>::value || std::is_same<T, std::shared_ptr<SchemaObj>>::value) {
-    std::shared_ptr<SchemaObj> schema_obj = schema;
-    ds = std::make_shared<RandomNode>(total_rows, std::move(schema_obj), std::move(columns_list), std::move(sampler),
-                                      cache);
-  } else {
-    ds =
-      std::make_shared<RandomNode>(total_rows, std::move(schema), std::move(columns_list), std::move(sampler), cache);
+  /// \brief Function to create a BatchDataset
+  /// \notes Combines batch_size number of consecutive rows into batches
+  /// \param[in] batch_size The number of rows each batch is created with
+  /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
+  ///     batch. If true, and if there are less than batch_size rows
+  ///     available to make the last batch, then those rows will
+  ///     be dropped and not propagated to the next node
+  /// \return Shared pointer to the current BatchDataset
+  std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false) {
+    return std::make_shared<BatchDataset>(shared_from_this(), batch_size, drop_remainder);
   }
-  return ds;
-}
-
-/// \brief Function to create a TextFileNode
-/// \notes The generated dataset has one column ['text']
-/// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
-///     will be sorted in a lexicographical order.
-/// \param[in] num_samples The number of samples to be included in the dataset.
-///     (Default = 0 means all samples.)
-/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal)
-///     Can be any of:
-///     ShuffleMode.kFalse - No shuffling is performed.
-///     ShuffleMode.kFiles - Shuffle files only.
-///     ShuffleMode.kGlobal - Shuffle both the files and samples.
-/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
-/// \param[in] shard_id The shard ID within num_shards. This argument should be
-///     specified only when num_shards is also specified. (Default = 0)
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current TextFileNode
-std::shared_ptr<TextFileNode> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples = 0,
-                                       ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
-                                       int32_t shard_id = 0, const std::shared_ptr<DatasetCache> &cache = nullptr);
-
-#ifndef ENABLE_ANDROID
-/// \brief Function to create a TFRecordNode
-/// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
-///     will be sorted in a lexicographical order.
-/// \param[in] schema SchemaObj or string to schema path. (Default = nullptr, which means that the
-///     meta data from the TFData file is considered the schema.)
-/// \param[in] columns_list List of columns to be read. (Default = {}, read all columns)
-/// \param[in] num_samples The number of samples to be included in the dataset.
-///     (Default = 0 means all samples.)
-///     If num_samples is 0 and numRows(parsed from schema) does not exist, read the full dataset;
-///     If num_samples is 0 and numRows(parsed from schema) is greater than 0, read numRows rows;
-///     If both num_samples and numRows(parsed from schema) are greater than 0, read num_samples rows.
-/// \param[in] shuffle The mode for shuffling data every epoch. (Default = ShuffleMode::kGlobal)
-///     Can be any of:
-///     ShuffleMode::kFalse - No shuffling is performed.
-///     ShuffleMode::kFiles - Shuffle files only.
-///     ShuffleMode::kGlobal - Shuffle both the files and samples.
-/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
-/// \param[in] shard_id The shard ID within num_shards. This argument should be specified only
-///     when num_shards is also specified. (Default = 0)
-/// \param[in] shard_equal_rows Get equal rows for all shards. (Default = False, number of rows of
-///     each shard may be not equal)
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current TFRecordNode
-template <typename T = std::shared_ptr<SchemaObj>>
-std::shared_ptr<TFRecordNode> TFRecord(const std::vector<std::string> &dataset_files, const T &schema = nullptr,
-                                       const std::vector<std::string> &columns_list = {}, int64_t num_samples = 0,
-                                       ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
-                                       int32_t shard_id = 0, bool shard_equal_rows = false,
-                                       const std::shared_ptr<DatasetCache> &cache = nullptr) {
-  if (dataset_files.empty()) {
-    MS_LOG(ERROR) << "TFRecordNode: dataset_files is not specified.";
-    return nullptr;
-  }
-
-  for (auto f : dataset_files) {
-    Path dataset_file(f);
-    if (!dataset_file.Exists()) {
-      MS_LOG(ERROR) << "TFRecordNode: dataset file: [" << f << "] is invalid or does not exist.";
-      return nullptr;
-    }
-  }
-
-  if (num_samples < 0) {
-    MS_LOG(ERROR) << "TFRecordNode: Invalid number of samples: " << num_samples;
-    return nullptr;
-  }
-
-  if (num_shards <= 0) {
-    MS_LOG(ERROR) << "TFRecordNode: Invalid num_shards: " << num_shards;
-    return nullptr;
-  }
-
-  if (shard_id < 0 || shard_id >= num_shards) {
-    MS_LOG(ERROR) << "TFRecordNode: Invalid input, shard_id: " << shard_id << ", num_shards: " << num_shards;
-    return nullptr;
-  }
-
-  if (cache == nullptr && !shard_equal_rows && dataset_files.size() < num_shards) {
-    // This check only makes sense in a non-cache path. We should make sure there is at least one file per
-    // shard in file-based sharding
-    MS_LOG(ERROR) << "TFRecordNode: Invalid number of dataset files, should at least be " << std::to_string(num_shards);
-    return nullptr;
-  }
-
-  std::shared_ptr<TFRecordNode> ds = nullptr;
-  if constexpr (std::is_same<T, std::nullptr_t>::value || std::is_same<T, std::shared_ptr<SchemaObj>>::value) {
-    std::shared_ptr<SchemaObj> schema_obj = schema;
-    ds = std::make_shared<TFRecordNode>(dataset_files, schema_obj, columns_list, num_samples, shuffle, num_shards,
-                                        shard_id, shard_equal_rows, cache);
-  } else {
-    std::string schema_path = schema;
-    if (!schema_path.empty()) {
-      Path schema_file(schema_path);
-      if (!schema_file.Exists()) {
-        MS_LOG(ERROR) << "TFRecordNode: schema path [" << schema_path << "] is invalid or does not exist.";
-        return nullptr;
-      }
-    }
-    ds = std::make_shared<TFRecordNode>(dataset_files, schema_path, columns_list, num_samples, shuffle, num_shards,
-                                        shard_id, shard_equal_rows, cache);
-  }
-  return ds;
-}
-
-/// \brief Function to create a VOCNode
-/// \notes The generated dataset has multi-columns :
-///     - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32],
-///                                  ['difficult', dtype=uint32], ['truncate', dtype=uint32]].
-///     - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]].
-/// \param[in] dataset_dir Path to the root directory that contains the dataset
-/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection"
-/// \param[in] usage The type of data list text file to be read (default = "train").
-/// \param[in] class_indexing A str-to-int mapping from label name to index, only valid in "Detection" task
-/// \param[in] decode Decode the images after reading
-/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
-///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-///     The cache feature is under development and is not recommended.
-/// \return Shared pointer to the current Dataset
-std::shared_ptr<VOCNode> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
-                             const std::string &usage = "train",
-                             const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,
-                             const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
-                             const std::shared_ptr<DatasetCache> &cache = nullptr);
-
-/// \brief Function the create a cache to be attached to a dataset
-/// \param id A user assigned session id for the current pipeline
-/// \param mem_sz Size of the memory set aside for the row caching. 0 for unlimited
-/// \param spill Spill to disk if out of memory
-/// \param hostname optional host name
-/// \param port optional port
-/// \param num_connections optional number of connections
-/// \param prefetch_sz optional prefetch size
-/// \return Shared pointer to DatasetCache. If error, nullptr is returned.
-std::shared_ptr<DatasetCache> CreateDatasetCache(session_id_type id, uint64_t mem_sz, bool spill,
-                                                 std::optional<std::string> hostname = std::nullopt,
-                                                 std::optional<int32_t> port = std::nullopt,
-                                                 std::optional<int32_t> num_connections = std::nullopt,
-                                                 std::optional<int32_t> prefetch_sz = std::nullopt);
-#endif
-
-/// \brief Function to create a sampler for non-mappable dataset (to be used by cache op later).
-/// \notes Non-mappable dataset does not directly support a sampler. It has provided sampling arguments (shuffle,
-///     num_samples, num_shards, shard_id) and it DOES support sampling if somewhere above it in the pipeline contains
-///     a cache. If there is no cache above it, then the sampler is not used.
-/// \param[in] num_samples The number of samples to be included in the dataset.
-/// \param[in] shuffle If true, the indices are shuffled.
-/// \param[in] num_shards Number of shards to divide the dataset into.
-/// \param[in] shard_id Shard ID of the current shard within num_shards.
-/// \return Shared pointer to the current Sampler.
-std::shared_ptr<SamplerObj> SelectSampler(int64_t num_samples, bool shuffle, int32_t num_shards, int32_t shard_id);
-
-/// \brief Function to create a ZipNode
-/// \notes Applies zip to the dataset
-/// \param[in] datasets List of shared pointers to the datasets that we want to zip
-/// \return Shared pointer to the current Dataset
-std::shared_ptr<ZipNode> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets);
-
-/// \class Dataset datasets.h
-/// \brief A base class to represent a dataset in the data pipeline.
-class Dataset : public std::enable_shared_from_this<Dataset> {
- public:
-  // need friend class so they can access the children_ field
-  friend class Iterator;
-  friend class TransferNode;
-  friend class mindspore::dataset::TreeAdapter;
-
-  /// \brief Constructor
-  Dataset();
-
-  /// \brief Constructor that initializes the cache
-  /// \param dataset_cache DatasetCache
-  explicit Dataset(const std::shared_ptr<DatasetCache> &dataset_cache);
-
-  /// \brief Destructor
-  ~Dataset() = default;
-
-  /// \brief Pure virtual function to convert a Dataset class into a runtime dataset object
-  /// \return The list of shared pointers to the newly created DatasetOps
-  virtual std::vector<std::shared_ptr<DatasetOp>> Build() = 0;
-
-  /// \brief Pure virtual function for derived class to implement parameters validation
-  /// \return Status Status::OK() if all the parameters are valid
-  virtual Status ValidateParams() = 0;
-
-  /// \brief Pure virtual function for derived class to get the shard id of specific node
-  /// \return Status Status::OK() if get shard id successfully
-  virtual Status GetShardId(int32_t *shard_id) {
-    return Status(StatusCode::kNotImplementedYet, __LINE__, __FILE__, "Method is not implemented yet.");
-  }
-
-  /// \brief Gets the dataset size
-  /// \return dataset size. If failed, return -1
-  int64_t GetDatasetSize();
-
-  /// \brief Gets the output type
-  /// \return a vector of DataType. If failed, return an empty vector
-  std::vector<DataType> GetOutputTypes();
-
-  /// \brief Gets the output shape
-  /// \return a vector of TensorShape. If failed, return am empty vector
-  std::vector<TensorShape> GetOutputShapes();
-
-  /// \brief Gets the batch size
-  /// \return int64_t
-  int64_t GetBatchSize();
-
-  /// \brief Gets the the repeat count
-  /// \return int64_t
-  int64_t GetRepeatCount();
-
-  /// \brief Gets the number of classes
-  /// \return number of classes. If failed, return -1
-  int64_t GetNumClasses();
-
-  /// \brief Setter function for runtime number of workers
-  /// \param[in] num_workers The number of threads in this operator
-  /// \return Shared pointer to the original object
-  std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers) {
-#if !defined(_WIN32) && !defined(_WIN64)
-#ifndef ENABLE_ANDROID
-    int32_t cpu_count = sysconf(_SC_NPROCESSORS_CONF);
-    if (cpu_count < 0 || cpu_count > INT32_MAX) {
-      MS_LOG(ERROR) << "Error determining current CPU: " << cpu_count;
-      return nullptr;
-    }
-    if (num_workers < 1 || num_workers > cpu_count) {
-      MS_LOG(ERROR) << "num_workers exceeds the boundary between 1 and " << cpu_count;
-      return nullptr;
-    }
-#endif
-#endif
-    num_workers_ = num_workers;
-    return shared_from_this();
-  }
-
-  /// \brief Function to create an Iterator over the Dataset pipeline
-  /// \param[in] columns List of columns to be used to specify the order of columns
-  /// \return Shared pointer to the Iterator
-  std::shared_ptr<Iterator> CreateIterator(std::vector<std::string> columns = {});
-
-  /// \brief Function to transfer data through a device.
-  /// \notes If device is Ascend, features of data will be transferred one by one. The limitation
-  ///     of data transmission per time is 256M.
-  /// \param[in] send_epoch_end Whether to send end of sequence to device or not (default=True).
-  /// \return Returns true if no error encountered else false.
-  bool DeviceQueue(bool send_epoch_end = true);
-
-#ifndef ENABLE_ANDROID
-  /// \brief Function to create a Saver to save the dynamic data processed by the dataset pipeline
-  /// \note Usage restrictions:
-  ///     1. Supported dataset formats: 'mindrecord' only
-  ///     2. To save the samples in order, set dataset's shuffle to false and num_files to 1.
-  ///     3. Before calling the function, do not use batch operator, repeat operator or data augmentation operators
-  ///        with random attribute in map operator.
-  ///     4. Mindrecord does not support bool, uint64, multi-dimensional uint8(drop dimension) nor
-  ///        multi-dimensional string.
-  /// \param[in] file_name Path to dataset file
-  /// \param[in] num_files Number of dataset files (default=1)
-  /// \param[in] file_type Dataset format (default="mindrecord")
-  /// \return Returns true if no error encountered else false
-  bool Save(std::string dataset_path, int32_t num_files = 1, std::string dataset_type = "mindrecord");
-#endif
-
-  /// \brief Function to create a BatchNode
-  /// \notes Combines batch_size number of consecutive rows into batches
-  /// \param[in] batch_size The number of rows each batch is created with
-  /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
-  ///     batch. If true, and if there are less than batch_size rows
-  ///     available to make the last batch, then those rows will
-  ///     be dropped and not propagated to the next node
-  /// \return Shared pointer to the current BatchNode
-  std::shared_ptr<BatchNode> Batch(int32_t batch_size, bool drop_remainder = false);
 
 #ifndef ENABLE_ANDROID
-  /// \brief Function to create a BucketBatchByLengthNode
+  /// \brief Function to create a BucketBatchByLengthDataset
   /// \notes Bucket elements according to their lengths. Each bucket will be padded and batched when
   ///    they are full.
   /// \param[in] column_names Columns passed to element_length_function
@@ -712,13 +194,17 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
   ///    an error will occur (default=false).
   /// \param[in] drop_remainder If true, will drop the last batch for each bucket if it is not a full batch
   ///    (default=false).
-  /// \return Shared pointer to the current BucketBatchByLengthNode
-  std::shared_ptr<BucketBatchByLengthNode> BucketBatchByLength(
+  /// \return Shared pointer to the current BucketBatchByLengthDataset
+  std::shared_ptr<BucketBatchByLengthDataset> BucketBatchByLength(
     const std::vector<std::string> &column_names, const std::vector<int32_t> &bucket_boundaries,
     const std::vector<int32_t> &bucket_batch_sizes,
     std::function<TensorRow(TensorRow)> element_length_function = nullptr,
     const std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> &pad_info = {},
-    bool pad_to_bucket_boundary = false, bool drop_remainder = false);
+    bool pad_to_bucket_boundary = false, bool drop_remainder = false) {
+    return std::make_shared<BucketBatchByLengthDataset>(shared_from_this(), column_names, bucket_boundaries,
+                                                        bucket_batch_sizes, element_length_function, pad_info,
+                                                        pad_to_bucket_boundary, drop_remainder);
+  }
 
   /// \brief Function to create a SentencePieceVocab from source dataset
   /// \notes Build a SentencePieceVocab from a dataset.
@@ -753,13 +239,17 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
                                     bool special_first = true);
 #endif
 
-  /// \brief Function to create a ConcatNode
+  /// \brief Function to create a ConcatDataset
   /// \notes Concat the datasets in the input
   /// \param[in] datasets List of shared pointers to the dataset that should be concatenated together
-  /// \return Shared pointer to the current ConcatNode
-  std::shared_ptr<ConcatNode> Concat(const std::vector<std::shared_ptr<Dataset>> &datasets);
+  /// \return Shared pointer to the current ConcatDataset
+  std::shared_ptr<ConcatDataset> Concat(const std::vector<std::shared_ptr<Dataset>> &datasets) {
+    std::vector<std::shared_ptr<Dataset>> all_datasets = datasets;
+    all_datasets.push_back(shared_from_this());
+    return std::make_shared<ConcatDataset>(all_datasets);
+  }
 
-  /// \brief Function to create a MapNode
+  /// \brief Function to create a MapDataset
   /// \notes Applies each operation in operations to this dataset
   /// \param[in] operations Vector of operations to be applied on the dataset. Operations are
   ///     applied in the order they appear in this list
@@ -775,71 +265,81 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
   /// \param[in] project_columns A list of column names to project
   /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
   ///     The cache feature is under development and is not recommended.
-  /// \return Shared pointer to the current MapNode
-  std::shared_ptr<MapNode> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
-                               std::vector<std::string> input_columns = {},
-                               std::vector<std::string> output_columns = {},
-                               const std::vector<std::string> &project_columns = {},
-                               const std::shared_ptr<DatasetCache> &cache = nullptr);
+  /// \return Shared pointer to the current MapDataset
+  std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
+                                  std::vector<std::string> input_columns = {},
+                                  std::vector<std::string> output_columns = {},
+                                  const std::vector<std::string> &project_columns = {},
+                                  const std::shared_ptr<DatasetCache> &cache = nullptr) {
+    return std::make_shared<MapDataset>(shared_from_this(), operations, input_columns, output_columns, project_columns,
+                                        cache);
+  }
 
   /// \brief Function to create a Project Dataset
   /// \notes Applies project to the dataset
   /// \param[in] columns The name of columns to project
   /// \return Shared pointer to the current Dataset
-  std::shared_ptr<ProjectNode> Project(const std::vector<std::string> &columns);
+  std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns) {
+    return std::make_shared<ProjectDataset>(shared_from_this(), columns);
+  }
 
   /// \brief Function to create a Rename Dataset
   /// \notes Renames the columns in the input dataset
   /// \param[in] input_columns List of the input columns to rename
   /// \param[in] output_columns List of the output columns
   /// \return Shared pointer to the current Dataset
-  std::shared_ptr<RenameNode> Rename(const std::vector<std::string> &input_columns,
-                                     const std::vector<std::string> &output_columns);
+  std::shared_ptr<RenameDataset> Rename(const std::vector<std::string> &input_columns,
+                                        const std::vector<std::string> &output_columns) {
+    return std::make_shared<RenameDataset>(shared_from_this(), input_columns, output_columns);
+  }
 
-  /// \brief Function to create a RepeatNode
+  /// \brief Function to create a RepeatDataset
   /// \notes Repeats this dataset count times. Repeat indefinitely if count is -1
   /// \param[in] count Number of times the dataset should be repeated
   /// \return Shared pointer to the current Dataset
-  /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatNode`
+  /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset`
   ///     due to a limitation in the current implementation
-  std::shared_ptr<Dataset> Repeat(int32_t count = -1);
+  std::shared_ptr<RepeatDataset> Repeat(int32_t count = -1) {
+    return std::make_shared<RepeatDataset>(shared_from_this(), count);
+  }
 
   /// \brief Function to create a Shuffle Dataset
   /// \notes Randomly shuffles the rows of this dataset
   /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
-  /// \return Shared pointer to the current ShuffleNode
-  std::shared_ptr<ShuffleNode> Shuffle(int32_t buffer_size);
+  /// \return Shared pointer to the current ShuffleDataset
+  std::shared_ptr<ShuffleDataset> Shuffle(int32_t buffer_size) {
+    return std::make_shared<ShuffleDataset>(shared_from_this(), buffer_size);
+  }
 
-  /// \brief Function to create a SkipNode
+  /// \brief Function to create a SkipDataset
   /// \notes Skips count elements in this dataset.
   /// \param[in] count Number of elements the dataset to be skipped.
-  /// \return Shared pointer to the current SkipNode
-  std::shared_ptr<SkipNode> Skip(int32_t count);
+  /// \return Shared pointer to the current SkipDataset
+  std::shared_ptr<SkipDataset> Skip(int32_t count) { return std::make_shared<SkipDataset>(shared_from_this(), count); }
 
-  /// \brief Function to create a TakeNode
+  /// \brief Function to create a TakeDataset
   /// \notes Takes count elements in this dataset.
   /// \param[in] count Number of elements the dataset to be taken.
   /// \return Shared pointer to the current Dataset
-  std::shared_ptr<Dataset> Take(int32_t count = -1);
+  std::shared_ptr<TakeDataset> Take(int32_t count = -1) {
+    return std::make_shared<TakeDataset>(shared_from_this(), count);
+  }
 
   /// \brief Function to create a Zip Dataset
   /// \notes Applies zip to the dataset
   /// \param[in] datasets A list of shared pointers to the datasets that we want to zip
   /// \return Shared pointer to the current Dataset
-  std::shared_ptr<ZipNode> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets);
+  std::shared_ptr<ZipDataset> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets) {
+    std::vector<std::shared_ptr<Dataset>> all_datasets = datasets;
+    all_datasets.push_back(shared_from_this());
+    return std::make_shared<ZipDataset>(all_datasets);
+  }
+
+  std::shared_ptr<DatasetNode> IRNode() { return ir_node_; }
 
  protected:
-  std::vector<std::shared_ptr<Dataset>> children;
-  std::shared_ptr<Dataset> parent;
   std::shared_ptr<TreeGetters> tree_getters_;
-
-  int32_t num_workers_;
-  int32_t rows_per_buffer_;
-  int32_t connector_que_size_;
-  int32_t worker_connector_size_;
-
-  std::shared_ptr<DatasetCache> cache_;
-  Status AddCacheOp(std::vector<std::shared_ptr<DatasetOp>> *node_ops);
+  std::shared_ptr<DatasetNode> ir_node_;
 };
 
 class SchemaObj {
@@ -902,8 +402,569 @@ class SchemaObj {
   nlohmann::json columns_;
 };
 
-/* ####################################### Derived Dataset classes ################################# */
+class BatchDataset : public Dataset {
+ public:
+  BatchDataset(std::shared_ptr<Dataset> input, int32_t batch_size, bool drop_remainder = false);
+};
+
+#ifndef ENABLE_ANDROID
+class BucketBatchByLengthDataset : public Dataset {
+ public:
+  BucketBatchByLengthDataset(
+    std::shared_ptr<Dataset> input, const std::vector<std::string> &column_names,
+    const std::vector<int32_t> &bucket_boundaries, const std::vector<int32_t> &bucket_batch_sizes,
+    std::function<TensorRow(TensorRow)> element_length_function = nullptr,
+    const std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> &pad_info = {},
+    bool pad_to_bucket_boundary = false, bool drop_remainder = false);
+};
+
+#endif
+
+class ConcatDataset : public Dataset {
+ public:
+  explicit ConcatDataset(const std::vector<std::shared_ptr<Dataset>> &input);
+};
+
+class MapDataset : public Dataset {
+ public:
+  MapDataset(std::shared_ptr<Dataset> input, std::vector<std::shared_ptr<TensorOperation>> operations,
+             std::vector<std::string> input_columns, std::vector<std::string> output_columns,
+             const std::vector<std::string> &project_columns, const std::shared_ptr<DatasetCache> &cache);
+};
+
+class ProjectDataset : public Dataset {
+ public:
+  ProjectDataset(std::shared_ptr<Dataset> input, const std::vector<std::string> &columns);
+};
+
+class RenameDataset : public Dataset {
+ public:
+  RenameDataset(std::shared_ptr<Dataset> input, const std::vector<std::string> &input_columns,
+                const std::vector<std::string> &output_columns);
+};
+
+class RepeatDataset : public Dataset {
+ public:
+  RepeatDataset(std::shared_ptr<Dataset> input, int32_t count);
+};
+
+class ShuffleDataset : public Dataset {
+ public:
+  ShuffleDataset(std::shared_ptr<Dataset> input, int32_t buffer_size);
+};
+
+class SkipDataset : public Dataset {
+ public:
+  SkipDataset(std::shared_ptr<Dataset> input, int32_t count);
+};
 
+class TakeDataset : public Dataset {
+ public:
+  TakeDataset(std::shared_ptr<Dataset> input, int32_t count);
+};
+
+class ZipDataset : public Dataset {
+ public:
+  explicit ZipDataset(const std::vector<std::shared_ptr<Dataset>> &inputs);
+};
+
+/// \brief Function to create a SchemaObj
+/// \param[in] schema_file Path of schema file
+/// \return Shared pointer to the current schema
+std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");
+
+class AlbumDataset : public Dataset {
+ public:
+  AlbumDataset(const std::string &dataset_dir, const std::string &data_schema,
+               const std::vector<std::string> &column_names = {}, bool decode = false,
+               const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+               const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create an AlbumDataset
+/// \notes The generated dataset is specified through setting a schema
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] data_schema Path to dataset schema file
+/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns.
+///     (default = {})
+/// \param[in] decode the option to decode the images in dataset (default = false)
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
+                                    const std::vector<std::string> &column_names = {}, bool decode = false,
+                                    const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                    const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+class CelebADataset : public Dataset {
+ public:
+  explicit CelebADataset(const std::string &dataset_dir, const std::string &usage = "all",
+                         const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), bool decode = false,
+                         const std::set<std::string> &extensions = {},
+                         const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a CelebADataset
+/// \notes The generated dataset has two columns ['image', 'attr'].
+///      The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
+/// \param[in] dataset_dir Path to the root directory that contains the dataset.
+/// \param[in] usage One of "all", "train", "valid" or "test" (default = "all").
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] decode Decode the images after reading (default=false).
+/// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &usage = "all",
+                                      const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), bool decode = false,
+                                      const std::set<std::string> &extensions = {},
+                                      const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+class Cifar10Dataset : public Dataset {
+ public:
+  explicit Cifar10Dataset(const std::string &dataset_dir, const std::string &usage = "all",
+                          const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                          const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a Cifar10 Dataset
+/// \notes The generated dataset has two columns ["image", "label"]
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] usage of CIFAR10, can be "train", "test" or "all" (default = "all").
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, const std::string &usage = "all",
+                                        const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                        const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+class Cifar100Dataset : public Dataset {
+ public:
+  explicit Cifar100Dataset(const std::string &dataset_dir, const std::string &usage = "all",
+                           const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                           const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a Cifar100 Dataset
+/// \notes The generated dataset has three columns ["image", "coarse_label", "fine_label"]
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] usage of CIFAR100, can be "train", "test" or "all" (default = "all").
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir, const std::string &usage = "all",
+                                          const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                          const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+class CLUEDataset : public Dataset {
+ public:
+  explicit CLUEDataset(const std::vector<std::string> &dataset_files, const std::string &task = "AFQMC",
+                       const std::string &usage = "train", int64_t num_samples = 0,
+                       ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0,
+                       const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a CLUEDataset
+/// \notes The generated dataset has a variable number of columns depending on the task and usage
+/// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
+///     will be sorted in a lexicographical order.
+/// \param[in] task The kind of task, one of "AFQMC", "TNEWS", "IFLYTEK", "CMNLI", "WSC" and "CSL" (default="AFQMC").
+/// \param[in] usage Be used to "train", "test" or "eval" data (default="train").
+/// \param[in] num_samples The number of samples to be included in the dataset.
+///     (Default = 0 means all samples.)
+/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal)
+///     Can be any of:
+///     ShuffleMode::kFalse - No shuffling is performed.
+///     ShuffleMode::kFiles - Shuffle files only.
+///     ShuffleMode::kGlobal - Shuffle both the files and samples.
+/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
+/// \param[in] shard_id The shard ID within num_shards. This argument should be
+///     specified only when num_shards is also specified. (Default = 0)
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current CLUEDataset
+std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files, const std::string &task = "AFQMC",
+                                  const std::string &usage = "train", int64_t num_samples = 0,
+                                  ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
+                                  int32_t shard_id = 0, const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+class CocoDataset : public Dataset {
+ public:
+  CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task = "Detection",
+              const bool &decode = false, const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+              const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a CocoDataset
+/// \notes The generated dataset has multi-columns :
+///     - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
+///                                  ['iscrowd', dtype=uint32]].
+///     - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]].
+///     - task='Keypoint', column: [['image', dtype=uint8], ['keypoints', dtype=float32],
+///                                 ['num_keypoints', dtype=uint32]].
+///     - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
+///                                 ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] annotation_file Path to the annotation json
+/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
+/// \param[in] decode Decode the images after reading
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
+                                  const std::string &task = "Detection", const bool &decode = false,
+                                  const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                  const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+class CSVDataset : public Dataset {
+ public:
+  explicit CSVDataset(const std::vector<std::string> &dataset_files, char field_delim = ',',
+                      const std::vector<std::shared_ptr<CsvBase>> &column_defaults = {},
+                      const std::vector<std::string> &column_names = {}, int64_t num_samples = 0,
+                      ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0,
+                      const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a CSVDataset
+/// \notes The generated dataset has a variable number of columns
+/// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
+///    will be sorted in a lexicographical order.
+/// \param[in] field_delim A char that indicates the delimiter to separate fields (default=',').
+/// \param[in] column_defaults List of default values for the CSV field (default={}). Each item in the list is
+///    either a valid type (float, int, or string). If this is not provided, treats all columns as string type.
+/// \param[in] column_names List of column names of the dataset (default={}). If this is not provided, infers the
+///    column_names from the first row of CSV file.
+/// \param[in] num_samples The number of samples to be included in the dataset.
+///    (Default = 0 means all samples.)
+/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode::kGlobal)
+///    Can be any of:
+///    ShuffleMode::kFalse - No shuffling is performed.
+///    ShuffleMode::kFiles - Shuffle files only.
+///    ShuffleMode::kGlobal - Shuffle both the files and samples.
+/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
+/// \param[in] shard_id The shard ID within num_shards. This argument should be
+///    specified only when num_shards is also specified. (Default = 0)
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, char field_delim = ',',
+                                const std::vector<std::shared_ptr<CsvBase>> &column_defaults = {},
+                                const std::vector<std::string> &column_names = {}, int64_t num_samples = 0,
+                                ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
+                                int32_t shard_id = 0, const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+class ImageFolderDataset : public Dataset {
+ public:
+  explicit ImageFolderDataset(const std::string &dataset_dir, bool decode = false,
+                              const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                              const std::set<std::string> &extensions = {},
+                              const std::map<std::string, int32_t> &class_indexing = {},
+                              const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create an ImageFolderDataset
+/// \notes A source dataset that reads images from a tree of directories
+///     All images within one folder have the same label
+///     The generated dataset has two columns ["image", "label"]
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] decode A flag to decode in ImageFolder
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] extensions File extensions to be read
+/// \param[in] class_indexing a class name to label map
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current ImageFolderDataset
+std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir, bool decode = false,
+                                                const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                                const std::set<std::string> &extensions = {},
+                                                const std::map<std::string, int32_t> &class_indexing = {},
+                                                const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+#ifndef ENABLE_ANDROID
+class ManifestDataset : public Dataset {
+ public:
+  explicit ManifestDataset(const std::string &dataset_file, const std::string &usage = "train",
+                           const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                           const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,
+                           const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a ManifestDataset
+/// \notes The generated dataset has two columns ["image", "label"]
+/// \param[in] dataset_file The dataset file to be read
+/// \param[in] usage Need "train", "eval" or "inference" data (default="train")
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
+///     names will be sorted alphabetically and each class will be given a unique index starting from 0).
+/// \param[in] decode Decode the images after reading (default=false).
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current ManifestDataset
+std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage = "train",
+                                          const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                          const std::map<std::string, int32_t> &class_indexing = {},
+                                          bool decode = false, const std::shared_ptr<DatasetCache> &cache = nullptr);
+#endif
+
+#ifndef ENABLE_ANDROID
+class MindDataDataset : public Dataset {
+ public:
+  explicit MindDataDataset(const std::string &dataset_file, const std::vector<std::string> &columns_list = {},
+                           const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                           nlohmann::json padded_sample = nullptr, int64_t num_padded = 0);
+  explicit MindDataDataset(const std::vector<std::string> &dataset_files,
+                           const std::vector<std::string> &columns_list = {},
+                           const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                           nlohmann::json padded_sample = nullptr, int64_t num_padded = 0);
+};
+
+/// \brief Function to create a MindDataDataset
+/// \param[in] dataset_file File name of one component of a mindrecord source. Other files with identical source
+///     in the same path will be found and loaded automatically.
+/// \param[in] columns_list List of columns to be read (default={})
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()),
+///     supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler.
+/// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list.
+/// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards.
+/// \return Shared pointer to the current MindDataDataset
+std::shared_ptr<MindDataDataset> MindData(const std::string &dataset_file,
+                                          const std::vector<std::string> &columns_list = {},
+                                          const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                          nlohmann::json padded_sample = nullptr, int64_t num_padded = 0);
+
+/// \brief Function to create a MindDataDataset
+/// \param[in] dataset_files List of dataset files to be read directly.
+/// \param[in] columns_list List of columns to be read (default={})
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()),
+///     supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler.
+/// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list.
+/// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards.
+/// \return Shared pointer to the current MindDataDataset
+std::shared_ptr<MindDataDataset> MindData(const std::vector<std::string> &dataset_files,
+                                          const std::vector<std::string> &columns_list = {},
+                                          const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                          nlohmann::json padded_sample = nullptr, int64_t num_padded = 0);
+#endif
+
+class MnistDataset : public Dataset {
+ public:
+  explicit MnistDataset(const std::string &dataset_dir, const std::string &usage = "all",
+                        const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                        const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a MnistDataset
+/// \notes The generated dataset has two columns ["image", "label"]
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all").
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current MnistDataset
+std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage = "all",
+                                    const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                    const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+/// \brief Function to create a ConcatDataset
+/// \notes Reload "+" operator to concat two datasets
+/// \param[in] datasets1 Shared pointer to the first dataset to be concatenated
+/// \param[in] datasets2 Shared pointer to the second dataset to be concatenated
+/// \return Shared pointer to the current ConcatDataset
+std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
+                                         const std::shared_ptr<Dataset> &datasets2);
+
+class RandomDataDataset : public Dataset {
+ public:
+  RandomDataDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema,
+                    const std::vector<std::string> &columns_list, const std::shared_ptr<SamplerObj> &sampler,
+                    std::shared_ptr<DatasetCache> cache);
+
+  RandomDataDataset(const int32_t &total_rows, std::string schema_path, const std::vector<std::string> &columns_list,
+                    const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache);
+};
+
+/// \brief Function to create a RandomDataset
+/// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random)
+/// \param[in] schema SchemaObj to set column type, data type and data shape
+/// \param[in] columns_list List of columns to be read (default={}, read all columns)
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current Dataset
+template <typename T = std::shared_ptr<SchemaObj>>
+std::shared_ptr<RandomDataDataset> RandomData(const int32_t &total_rows = 0, const T &schema = nullptr,
+                                              const std::vector<std::string> &columns_list = {},
+                                              const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                              const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  std::shared_ptr<RandomDataDataset> ds;
+  if constexpr (std::is_same<T, std::nullptr_t>::value || std::is_same<T, std::shared_ptr<SchemaObj>>::value) {
+    std::shared_ptr<SchemaObj> schema_obj = schema;
+    ds = std::make_shared<RandomDataDataset>(total_rows, std::move(schema_obj), std::move(columns_list),
+                                             std::move(sampler), cache);
+  } else {
+    ds = std::make_shared<RandomDataDataset>(total_rows, std::move(schema), std::move(columns_list), std::move(sampler),
+                                             cache);
+  }
+  return ds;
+}
+
+class TextFileDataset : public Dataset {
+ public:
+  explicit TextFileDataset(const std::vector<std::string> &dataset_files, int64_t num_samples = 0,
+                           ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0,
+                           const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a TextFileDataset
+/// \notes The generated dataset has one column ['text']
+/// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
+///     will be sorted in a lexicographical order.
+/// \param[in] num_samples The number of samples to be included in the dataset.
+///     (Default = 0 means all samples.)
+/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal)
+///     Can be any of:
+///     ShuffleMode.kFalse - No shuffling is performed.
+///     ShuffleMode.kFiles - Shuffle files only.
+///     ShuffleMode.kGlobal - Shuffle both the files and samples.
+/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
+/// \param[in] shard_id The shard ID within num_shards. This argument should be
+///     specified only when num_shards is also specified. (Default = 0)
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current TextFileDataset
+std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples = 0,
+                                          ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
+                                          int32_t shard_id = 0, const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+#ifndef ENABLE_ANDROID
+class TFRecordDataset : public Dataset {
+ public:
+  TFRecordDataset(const std::vector<std::string> &dataset_files, std::string schema,
+                  const std::vector<std::string> &columns_list, int64_t num_samples, ShuffleMode shuffle,
+                  int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr<DatasetCache> cache);
+
+  /// \brief Constructor
+  /// \note Parameter 'schema' is shared pointer to Schema object
+  TFRecordDataset(const std::vector<std::string> &dataset_files, std::shared_ptr<SchemaObj> schema,
+                  const std::vector<std::string> &columns_list, int64_t num_samples, ShuffleMode shuffle,
+                  int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr<DatasetCache> cache);
+};
+
+/// \brief Function to create a TFRecordDataset
+/// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
+///     will be sorted in a lexicographical order.
+/// \param[in] schema SchemaObj or string to schema path. (Default = nullptr, which means that the
+///     meta data from the TFData file is considered the schema.)
+/// \param[in] columns_list List of columns to be read. (Default = {}, read all columns)
+/// \param[in] num_samples The number of samples to be included in the dataset.
+///     (Default = 0 means all samples.)
+///     If num_samples is 0 and numRows(parsed from schema) does not exist, read the full dataset;
+///     If num_samples is 0 and numRows(parsed from schema) is greater than 0, read numRows rows;
+///     If both num_samples and numRows(parsed from schema) are greater than 0, read num_samples rows.
+/// \param[in] shuffle The mode for shuffling data every epoch. (Default = ShuffleMode::kGlobal)
+///     Can be any of:
+///     ShuffleMode::kFalse - No shuffling is performed.
+///     ShuffleMode::kFiles - Shuffle files only.
+///     ShuffleMode::kGlobal - Shuffle both the files and samples.
+/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
+/// \param[in] shard_id The shard ID within num_shards. This argument should be specified only
+///     when num_shards is also specified. (Default = 0)
+/// \param[in] shard_equal_rows Get equal rows for all shards. (Default = False, number of rows of
+///     each shard may be not equal)
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current TFRecordDataset
+template <typename T = std::shared_ptr<SchemaObj>>
+std::shared_ptr<TFRecordDataset> TFRecord(const std::vector<std::string> &dataset_files, const T &schema = nullptr,
+                                          const std::vector<std::string> &columns_list = {}, int64_t num_samples = 0,
+                                          ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
+                                          int32_t shard_id = 0, bool shard_equal_rows = false,
+                                          const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  std::shared_ptr<TFRecordDataset> ds = nullptr;
+  if constexpr (std::is_same<T, std::nullptr_t>::value || std::is_same<T, std::shared_ptr<SchemaObj>>::value) {
+    std::shared_ptr<SchemaObj> schema_obj = schema;
+    ds = std::make_shared<TFRecordDataset>(dataset_files, schema_obj, columns_list, num_samples, shuffle, num_shards,
+                                           shard_id, shard_equal_rows, cache);
+  } else {
+    std::string schema_path = schema;
+    if (!schema_path.empty()) {
+      Path schema_file(schema_path);
+      if (!schema_file.Exists()) {
+        MS_LOG(ERROR) << "TFRecordDataset: schema path [" << schema_path << "] is invalid or does not exist.";
+        return nullptr;
+      }
+    }
+    ds = std::make_shared<TFRecordDataset>(dataset_files, schema_path, columns_list, num_samples, shuffle, num_shards,
+                                           shard_id, shard_equal_rows, cache);
+  }
+  return ds;
+}
+
+class VOCDataset : public Dataset {
+ public:
+  explicit VOCDataset(const std::string &dataset_dir, const std::string &task = "Segmentation",
+                      const std::string &usage = "train", const std::map<std::string, int32_t> &class_indexing = {},
+                      bool decode = false, const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                      const std::shared_ptr<DatasetCache> &cache = nullptr);
+};
+
+/// \brief Function to create a VOCDataset
+/// \notes The generated dataset has multi-columns :
+///     - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32],
+///                                  ['difficult', dtype=uint32], ['truncate', dtype=uint32]].
+///     - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]].
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection"
+/// \param[in] usage The type of data list text file to be read (default = "train").
+/// \param[in] class_indexing A str-to-int mapping from label name to index, only valid in "Detection" task
+/// \param[in] decode Decode the images after reading
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
+///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+///     The cache feature is under development and is not recommended.
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
+                                const std::string &usage = "train",
+                                const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,
+                                const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
+                                const std::shared_ptr<DatasetCache> &cache = nullptr);
+
+/// \brief Function the create a cache to be attached to a dataset
+/// \param id A user assigned session id for the current pipeline
+/// \param mem_sz Size of the memory set aside for the row caching. 0 for unlimited
+/// \param spill Spill to disk if out of memory
+/// \param hostname optional host name
+/// \param port optional port
+/// \param num_connections optional number of connections
+/// \param prefetch_sz optional prefetch size
+/// \return Shared pointer to DatasetCache. If error, nullptr is returned.
+std::shared_ptr<DatasetCache> CreateDatasetCache(session_id_type id, uint64_t mem_sz, bool spill,
+                                                 std::optional<std::string> hostname = std::nullopt,
+                                                 std::optional<int32_t> port = std::nullopt,
+                                                 std::optional<int32_t> num_connections = std::nullopt,
+                                                 std::optional<int32_t> prefetch_sz = std::nullopt);
+#endif
+
+/// \brief Function to create a ZipDataset
+/// \notes Applies zip to the dataset
+/// \param[in] datasets List of shared pointers to the datasets that we want to zip
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<ZipDataset> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets);
 }  // namespace api
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/include/iterator.h b/mindspore/ccsrc/minddata/dataset/include/iterator.h
index 4c82fad574..58781ecefb 100644
--- a/mindspore/ccsrc/minddata/dataset/include/iterator.h
+++ b/mindspore/ccsrc/minddata/dataset/include/iterator.h
@@ -49,7 +49,7 @@ class Iterator {
   Iterator() : consumer_(nullptr) {}
 
   /// \brief Destructor
-  ~Iterator() = default;
+  ~Iterator() { Stop(); }
 
   /// \brief Method for building and launching the pipeline.
   /// \param[in] ops - a vector of DatasetOp in the data pipeline.
diff --git a/mindspore/lite/minddata/CMakeLists.txt b/mindspore/lite/minddata/CMakeLists.txt
index e934dcd64d..4bd78ede99 100644
--- a/mindspore/lite/minddata/CMakeLists.txt
+++ b/mindspore/lite/minddata/CMakeLists.txt
@@ -82,30 +82,30 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv  MINDDATA_KERNELS_IMA
 
 if (BUILD_MINDDATA STREQUAL "full")
     include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image")
-    list(REMOVE_ITEM MINDDATA_API_SRC_FILES 
-        "${MINDDATA_DIR}/api/text.cc" 
-        )
+    list(REMOVE_ITEM MINDDATA_API_SRC_FILES
+            "${MINDDATA_DIR}/api/text.cc"
+            )
 
-    list(REMOVE_ITEM MINDDATA_CALLBACK_SRC_FILES 
-        "${MINDDATA_DIR}/callback/py_ds_callback.cc" 
-        )
+    list(REMOVE_ITEM MINDDATA_CALLBACK_SRC_FILES
+            "${MINDDATA_DIR}/callback/py_ds_callback.cc"
+            )
 
     list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES
-        "${MINDDATA_DIR}/core/cv_tensor.cc"
-        )
+            "${MINDDATA_DIR}/core/cv_tensor.cc"
+            )
 
     list(REMOVE_ITEM MINDDATA_KERNELS_SRC_FILES "${MINDDATA_DIR}/kernels/py_func_op.cc")
     list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SRC_FILES
-        "${MINDDATA_DIR}/engine/datasetops/build_sentence_piece_vocab_op.cc"
-        "${MINDDATA_DIR}/engine/datasetops/filter_op.cc"
-        "${MINDDATA_DIR}/engine/datasetops/barrier_op.cc"
-        "${MINDDATA_DIR}/engine/datasetops/bucket_batch_by_length_op.cc"
-        "${MINDDATA_DIR}/engine/datasetops/build_vocab_op.cc"
-        "${MINDDATA_DIR}/engine/datasetops/cache_merge_op.cc"
-        "${MINDDATA_DIR}/engine/datasetops/cache_base_op.cc"
-        "${MINDDATA_DIR}/engine/datasetops/cache_lookup_op.cc"
-        "${MINDDATA_DIR}/engine/datasetops/cache_op.cc"
-        )
+            "${MINDDATA_DIR}/engine/datasetops/build_sentence_piece_vocab_op.cc"
+            "${MINDDATA_DIR}/engine/datasetops/filter_op.cc"
+            "${MINDDATA_DIR}/engine/datasetops/barrier_op.cc"
+            "${MINDDATA_DIR}/engine/datasetops/bucket_batch_by_length_op.cc"
+            "${MINDDATA_DIR}/engine/datasetops/build_vocab_op.cc"
+            "${MINDDATA_DIR}/engine/datasetops/cache_merge_op.cc"
+            "${MINDDATA_DIR}/engine/datasetops/cache_base_op.cc"
+            "${MINDDATA_DIR}/engine/datasetops/cache_lookup_op.cc"
+            "${MINDDATA_DIR}/engine/datasetops/cache_op.cc"
+            )
 
     list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES
         "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc"
@@ -161,47 +161,55 @@ if (BUILD_MINDDATA STREQUAL "full")
         "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc"
         "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc"
         "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_crop_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_resize_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc"
-        "${MINDDATA_DIR}/kernels/image/rescale_op.cc"
-        "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc"
-        "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc"
-        "${MINDDATA_DIR}/kernels/image/sharpness_op.cc"
-        "${MINDDATA_DIR}/kernels/image/solarize_op.cc"
-        "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc"
-        "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc"
-        "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc"
-        "${MINDDATA_DIR}/kernels/image/random_color_op.cc"
-	)
+            "${MINDDATA_DIR}/kernels/image/random_crop_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_resize_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc"
+            "${MINDDATA_DIR}/kernels/image/rescale_op.cc"
+            "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc"
+            "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc"
+            "${MINDDATA_DIR}/kernels/image/sharpness_op.cc"
+            "${MINDDATA_DIR}/kernels/image/solarize_op.cc"
+            "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc"
+            "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc"
+            "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc"
+            "${MINDDATA_DIR}/kernels/image/random_color_op.cc"
+            )
 
     list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES
-        "${MINDDATA_DIR}/engine/ir/datasetops/bucket_batch_by_length_node.cc"
-        "${MINDDATA_DIR}/engine/ir/datasetops/build_sentence_piece_vocab_node.cc"
-        "${MINDDATA_DIR}/engine/ir/datasetops/build_vocab_node.cc"
-        "${MINDDATA_DIR}/engine/ir/datasetops/sync_wait_node.cc"
-        )
+            "${MINDDATA_DIR}/engine/ir/datasetops/bucket_batch_by_length_node.cc"
+            "${MINDDATA_DIR}/engine/ir/datasetops/build_sentence_piece_vocab_node.cc"
+            "${MINDDATA_DIR}/engine/ir/datasetops/build_vocab_node.cc"
+            "${MINDDATA_DIR}/engine/ir/datasetops/sync_wait_node.cc"
+            )
+    list(REMOVE_ITEM MINDDATA_ENGINE_CONSUMERS_SRC_FILES
+            "${MINDDATA_DIR}/engine/consumers/python_tree_consumer.cc"
+            )
+
+    list(REMOVE_ITEM MINDDATA_ENGINE_SRC_FILES
+            "${MINDDATA_DIR}/engine/python_runtime_context.cc"
+            )
+
     list(REMOVE_ITEM MINDDATA_KERNELS_DATA_SRC_FILES
-        "${MINDDATA_DIR}/kernels/data/unique_op.cc"
-	)
+            "${MINDDATA_DIR}/kernels/data/unique_op.cc"
+            )
     include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache")
 
     if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64))
-        set(MINDDATA_EXAMPLE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/example/jni-example.cc)
-    endif()
+      set(MINDDATA_EXAMPLE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/example/jni-example.cc)
+    endif ()
 
     add_library(minddata-lite SHARED
-        ${MINDDATA_API_SRC_FILES}
+            ${MINDDATA_API_SRC_FILES}
         ${MINDDATA_CALLBACK_SRC_FILES}
         ${MINDDATA_CORE_SRC_FILES}
         ${MINDDATA_ENGINE_SRC_FILES}
diff --git a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
index ddd147cfed..6b06259e85 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
@@ -1093,7 +1093,7 @@ TEST_F(MindDataTestPipeline, TestTakeDatasetDefault) {
   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 7));
   EXPECT_NE(ds, nullptr);
 
-  // Create a Take operation on ds, dafault count = -1
+  // Create a Take operation on ds, default count = -1
   ds = ds->Take();
   EXPECT_NE(ds, nullptr);
 
diff --git a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
index 232fdbe25d..090c4f7352 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
@@ -429,7 +429,7 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetWithNullSampler) {
   schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
   std::shared_ptr<Dataset> ds = RandomData(50, schema, {}, nullptr);
   // Expect failure: sampler can not be nullptr
-  EXPECT_EQ(ds, nullptr);
+  EXPECT_EQ(ds->CreateIterator(), nullptr);
 }
 
 TEST_F(MindDataTestPipeline, TestRandomDatasetDuplicateColumnName) {
@@ -441,5 +441,5 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetDuplicateColumnName) {
   schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
   std::shared_ptr<Dataset> ds = RandomData(50, schema, {"image", "image"});
   // Expect failure: duplicate column names
-  EXPECT_EQ(ds, nullptr);
+  EXPECT_EQ(ds->CreateIterator(), nullptr);
 }
diff --git a/tests/ut/cpp/dataset/c_api_dataset_tfrecord_test.cc b/tests/ut/cpp/dataset/c_api_dataset_tfrecord_test.cc
index bed08f6ada..62eb1efd8c 100644
--- a/tests/ut/cpp/dataset/c_api_dataset_tfrecord_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_tfrecord_test.cc
@@ -443,34 +443,34 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetExeception) {
 
   // This case expected to fail because the list of dir_path cannot be empty.
   std::shared_ptr<Dataset> ds1 = TFRecord({});
-  EXPECT_EQ(ds1, nullptr);
+  EXPECT_EQ(ds1->CreateIterator(), nullptr);
 
   // This case expected to fail because the file in dir_path is not exist.
   std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
   std::shared_ptr<Dataset> ds2 = TFRecord({file_path, "noexist.data"});
-  EXPECT_EQ(ds2, nullptr);
+  EXPECT_EQ(ds2->CreateIterator(), nullptr);
 
   // This case expected to fail because the file of schema is not exist.
   std::shared_ptr<Dataset> ds4 = TFRecord({file_path, "notexist.json"});
-  EXPECT_EQ(ds4, nullptr);
+  EXPECT_EQ(ds4->CreateIterator(), nullptr);
 
   // This case expected to fail because num_samples is negative.
   std::shared_ptr<Dataset> ds5 = TFRecord({file_path}, "", {}, -1);
-  EXPECT_EQ(ds5, nullptr);
+  EXPECT_EQ(ds5->CreateIterator(), nullptr);
 
   // This case expected to fail because num_shards is negative.
   std::shared_ptr<Dataset> ds6 = TFRecord({file_path}, "", {}, 10, ShuffleMode::kFalse, 0);
-  EXPECT_EQ(ds6, nullptr);
+  EXPECT_EQ(ds6->CreateIterator(), nullptr);
 
   // This case expected to fail because shard_id is out_of_bound.
   std::shared_ptr<Dataset> ds7 = TFRecord({file_path}, "", {}, 10, ShuffleMode::kFalse, 3, 3);
-  EXPECT_EQ(ds7, nullptr);
+  EXPECT_EQ(ds7->CreateIterator(), nullptr);
 
   // This case expected to fail because the provided number of files < num_shards in file-based sharding.
   std::string file_path1 = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
   std::string file_path2 = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0002.data";
   std::shared_ptr<Dataset> ds8 = TFRecord({file_path1, file_path2}, "", {}, 0, ShuffleMode::kFalse, 3);
-  EXPECT_EQ(ds8, nullptr);
+  EXPECT_EQ(ds8->CreateIterator(), nullptr);
 }
 
 TEST_F(MindDataTestPipeline, TestTFRecordDatasetExeception2) {
diff --git a/tests/ut/cpp/dataset/tree_adapter_test.cc b/tests/ut/cpp/dataset/tree_adapter_test.cc
index f29dec7654..c0f336232b 100644
--- a/tests/ut/cpp/dataset/tree_adapter_test.cc
+++ b/tests/ut/cpp/dataset/tree_adapter_test.cc
@@ -56,7 +56,7 @@ TEST_F(MindDataTestTreeAdapter, TestSimpleTreeAdapter) {
 
   mindspore::dataset::TreeAdapter tree_adapter;
 
-  Status rc = tree_adapter.BuildAndPrepare(ds, 1);
+  Status rc = tree_adapter.BuildAndPrepare(ds->IRNode(), 1);
 
   EXPECT_TRUE(rc.IsOk());
 
@@ -91,7 +91,7 @@ TEST_F(MindDataTestTreeAdapter, TestTreeAdapterWithRepeat) {
 
   mindspore::dataset::TreeAdapter tree_adapter;
 
-  Status rc = tree_adapter.BuildAndPrepare(ds, 2);
+  Status rc = tree_adapter.BuildAndPrepare(ds->IRNode(), 2);
   EXPECT_TRUE(rc.IsOk());
 
   const std::unordered_map<std::string, int32_t> map = tree_adapter.GetColumnNameMap();
@@ -128,7 +128,7 @@ TEST_F(MindDataTestTreeAdapter, TestProjectMapTreeAdapter) {
 
   mindspore::dataset::TreeAdapter tree_adapter;
 
-  Status rc = tree_adapter.BuildAndPrepare(ds, 2);
+  Status rc = tree_adapter.BuildAndPrepare(ds->IRNode(), 2);
 
   EXPECT_TRUE(rc.IsOk());