!4422 Minddata new dataset api ==> RandomData

Merge pull request !4422 from xiefangqi/xfq_c++api_randomdata
5 years ago · 6868b9b6fa
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -28,6 +28,7 @@
 #include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
 #include "minddata/dataset/engine/datasetops/source/manifest_op.h"
 #include "minddata/dataset/engine/datasetops/source/mnist_op.h"
 #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
 #include "minddata/dataset/engine/datasetops/source/text_file_op.h"
 #include "minddata/dataset/engine/datasetops/source/voc_op.h"
 // Dataset operator headers (in alphabetical order)
@@ -102,6 +103,15 @@ Dataset::Dataset() {
  worker_connector_size_ = cfg->worker_connector_size();
 }

 /// \brief Function to create a SchemaObj
 /// \param[in] schema_file Path of schema file
 /// \return Shared pointer to the current schema
 std::shared_ptr<SchemaObj> Schema(const std::string &schema_file) {
  auto schema = std::make_shared<SchemaObj>(schema_file);

  return schema->init() ? schema : nullptr;
 }

 // FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS
 // (In alphabetical order)

@@ -366,6 +376,163 @@ std::shared_ptr<ZipDataset> Dataset::Zip(const std::vector<std::shared_ptr<Datas
  return ds->ValidateParams() ? ds : nullptr;
 }

 SchemaObj::SchemaObj(const std::string &schema_file) : schema_file_(schema_file), num_rows_(0), dataset_type_("") {}

 // SchemaObj init function
 bool SchemaObj::init() {
  if (schema_file_ != "") {
    Path schema_file(schema_file_);
    if (!schema_file.Exists()) {
      MS_LOG(ERROR) << "The file " << schema_file << " does not exist or permission denied!";
      return false;
    }

    nlohmann::json js;
    try {
      std::ifstream in(schema_file_);
      in >> js;
    } catch (const std::exception &err) {
      MS_LOG(ERROR) << "Schema file failed to load";
      return false;
    }
    return from_json(js);
  }
  return true;
 }

 // Function to add a column to schema with a mstype de_type
 bool SchemaObj::add_column(std::string name, TypeId de_type, std::vector<int32_t> shape) {
  nlohmann::json new_column;
  new_column["name"] = name;
  // if de_type is mstype
  DataType data_type = dataset::MSTypeToDEType(de_type);
  new_column["type"] = data_type.ToString();
  if (shape.size() > 0) {
    new_column["shape"] = shape;
    new_column["rank"] = shape.size();
  } else {
    new_column["rank"] = 1;
  }
  columns_.push_back(new_column);
  return true;
 }

 // Function to add a column to schema with a string de_type
 bool SchemaObj::add_column(std::string name, std::string de_type, std::vector<int32_t> shape) {
  nlohmann::json new_column;
  new_column["name"] = name;
  DataType data_type(de_type);
  new_column["type"] = data_type.ToString();
  if (shape.size() > 0) {
    new_column["shape"] = shape;
    new_column["rank"] = shape.size();
  } else {
    new_column["rank"] = 1;
  }
  columns_.push_back(new_column);
  return true;
 }

 std::string SchemaObj::to_json() {
  nlohmann::json json_file;
  json_file["columns"] = columns_;
  if (dataset_type_ != "") {
    json_file["datasetType"] = dataset_type_;
  }

  if (num_rows_ > 0) {
    json_file["numRows"] = num_rows_;
  }

  return json_file.dump(2);
 }

 bool SchemaObj::parse_column(nlohmann::json columns) {
  std::string name, de_type;
  std::vector<int32_t> shape;

  columns_.clear();
  if (columns.type() == nlohmann::json::value_t::array) {
    // reference to python list
    for (auto column : columns) {
      auto key_name = column.find("name");
      if (key_name == column.end()) {
        MS_LOG(ERROR) << "Column's name is missing";
        return false;
      }
      name = *key_name;

      auto key_type = column.find("type");
      if (key_type == column.end()) {
        MS_LOG(ERROR) << "Column's type is missing";
        return false;
      }
      de_type = *key_type;

      shape.clear();
      auto key_shape = column.find("shape");
      if (key_shape != column.end()) {
        shape.insert(shape.end(), (*key_shape).begin(), (*key_shape).end());
      }
      if (!add_column(name, de_type, shape)) {
        return false;
      }
    }
  } else if (columns.type() == nlohmann::json::value_t::object) {
    for (const auto &it_child : columns.items()) {
      name = it_child.key();
      auto key_type = it_child.value().find("type");
      if (key_type == it_child.value().end()) {
        MS_LOG(ERROR) << "Column's type is missing";
        return false;
      }
      de_type = *key_type;

      shape.clear();
      auto key_shape = it_child.value().find("shape");
      if (key_shape != it_child.value().end()) {
        shape.insert(shape.end(), (*key_shape).begin(), (*key_shape).end());
      }

      if (!add_column(name, de_type, shape)) {
        return false;
      }
    }
  } else {
    MS_LOG(ERROR) << "columns must be dict or list, columns contain name, type, shape(optional).";
    return false;
  }
  return true;
 }

 bool SchemaObj::from_json(nlohmann::json json_obj) {
  for (const auto &it_child : json_obj.items()) {
    if (it_child.key() == "datasetType") {
      dataset_type_ = it_child.value();
    } else if (it_child.key() == "numRows") {
      num_rows_ = it_child.value();
    } else if (it_child.key() == "columns") {
      if (!parse_column(it_child.value())) {
        MS_LOG(ERROR) << "parse columns failed";
        return false;
      }
    } else {
      MS_LOG(ERROR) << "Unknown field " << it_child.key();
      return false;
    }
  }
  if (columns_.empty()) {
    MS_LOG(ERROR) << "Columns are missing.";
    return false;
  }
  if (num_rows_ <= 0) {
    MS_LOG(ERROR) << "numRows must be greater than 0";
    return false;
  }

  return true;
 }

 // OTHER FUNCTIONS

 // Helper function to create default RandomSampler.
@@ -960,6 +1127,67 @@ std::vector<std::shared_ptr<DatasetOp>> MnistDataset::Build() {
  return node_ops;
 }

 // ValideParams for RandomDataset
 bool RandomDataset::ValidateParams() {
  if (total_rows_ < 0) {
    MS_LOG(ERROR) << "RandomDataset: total_rows must be greater than 0, now get " << total_rows_;
    return false;
  }
  return true;
 }

 int32_t RandomDataset::GenRandomInt(int32_t min, int32_t max) {
  std::uniform_int_distribution<int32_t> uniDist(min, max);
  return uniDist(rand_gen_);
 }

 // Build for RandomDataset
 std::vector<std::shared_ptr<DatasetOp>> RandomDataset::Build() {
  // A vector containing shared pointer to the Dataset Ops that this object will create
  std::vector<std::shared_ptr<DatasetOp>> node_ops;

  rand_gen_.seed(GetSeed());  // seed the random generator
  // If total rows was not given, then randomly pick a number
  std::shared_ptr<SchemaObj> schema_obj;
  if (!schema_path_.empty()) schema_obj = std::make_shared<SchemaObj>(schema_path_);

  if (schema_obj != nullptr && total_rows_ == 0) {
    total_rows_ = schema_obj->get_num_rows();
  }

  // If user does not specify Sampler, create a default sampler based on the shuffle variable.
  if (sampler_ == nullptr) {
    sampler_ = CreateDefaultSampler();
  }

  std::string schema_json_string, schema_file_path;
  if (schema_ != nullptr) {
    schema_->set_dataset_type("Random");
    if (total_rows_ != 0) {
      schema_->set_num_rows(total_rows_);
    }
    schema_json_string = schema_->to_json();
  } else {
    schema_file_path = schema_path_;
  }

  std::unique_ptr<DataSchema> data_schema;
  std::vector<std::string> columns_to_load;
  if (!schema_file_path.empty() || !schema_json_string.empty()) {
    data_schema = std::make_unique<DataSchema>();
    if (!schema_file_path.empty()) {
      data_schema->LoadSchemaFile(schema_file_path, columns_to_load);
    } else if (!schema_json_string.empty()) {
      data_schema->LoadSchemaString(schema_json_string, columns_to_load);
    }
  }
  std::shared_ptr<RandomDataOp> op;
  op = std::make_shared<RandomDataOp>(num_workers_, connector_que_size_, rows_per_buffer_, total_rows_,
                                      std::move(data_schema), std::move(sampler_->Build()));
  node_ops.push_back(op);
  return node_ops;
 }

 // Constructor for TextFileDataset
 TextFileDataset::TextFileDataset(std::vector<std::string> dataset_files, int32_t num_samples, ShuffleMode shuffle,
                                 int32_t num_shards, int32_t shard_id)
--- a/mindspore/ccsrc/minddata/dataset/api/de_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/de_tensor.cc
@@ -15,6 +15,7 @@
 */

 #include "minddata/dataset/include/de_tensor.h"
 #include "minddata/dataset/include/type_id.h"
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/core/data_type.h"
 #include "mindspore/core/ir/dtype/type_id.h"
@@ -23,68 +24,6 @@

 namespace mindspore {
 namespace tensor {
 dataset::DataType MSTypeToDEType(TypeId data_type) {
  switch (data_type) {
    case kNumberTypeBool:
      return dataset::DataType(dataset::DataType::DE_BOOL);
    case kNumberTypeInt8:
      return dataset::DataType(dataset::DataType::DE_INT8);
    case kNumberTypeUInt8:
      return dataset::DataType(dataset::DataType::DE_UINT8);
    case kNumberTypeInt16:
      return dataset::DataType(dataset::DataType::DE_INT16);
    case kNumberTypeUInt16:
      return dataset::DataType(dataset::DataType::DE_UINT16);
    case kNumberTypeInt32:
      return dataset::DataType(dataset::DataType::DE_INT32);
    case kNumberTypeUInt32:
      return dataset::DataType(dataset::DataType::DE_UINT32);
    case kNumberTypeInt64:
      return dataset::DataType(dataset::DataType::DE_INT64);
    case kNumberTypeUInt64:
      return dataset::DataType(dataset::DataType::DE_UINT64);
    case kNumberTypeFloat16:
      return dataset::DataType(dataset::DataType::DE_FLOAT16);
    case kNumberTypeFloat32:
      return dataset::DataType(dataset::DataType::DE_FLOAT32);
    case kNumberTypeFloat64:
      return dataset::DataType(dataset::DataType::DE_FLOAT64);
    default:
      return dataset::DataType(dataset::DataType::DE_UNKNOWN);
  }
 }

 TypeId DETypeToMSType(dataset::DataType data_type) {
  switch (data_type.value()) {
    case dataset::DataType::DE_BOOL:
      return mindspore::TypeId::kNumberTypeBool;
    case dataset::DataType::DE_INT8:
      return mindspore::TypeId::kNumberTypeInt8;
    case dataset::DataType::DE_UINT8:
      return mindspore::TypeId::kNumberTypeUInt8;
    case dataset::DataType::DE_INT16:
      return mindspore::TypeId::kNumberTypeInt16;
    case dataset::DataType::DE_UINT16:
      return mindspore::TypeId::kNumberTypeUInt16;
    case dataset::DataType::DE_INT32:
      return mindspore::TypeId::kNumberTypeInt32;
    case dataset::DataType::DE_UINT32:
      return mindspore::TypeId::kNumberTypeUInt32;
    case dataset::DataType::DE_INT64:
      return mindspore::TypeId::kNumberTypeInt64;
    case dataset::DataType::DE_UINT64:
      return mindspore::TypeId::kNumberTypeUInt64;
    case dataset::DataType::DE_FLOAT16:
      return mindspore::TypeId::kNumberTypeFloat16;
    case dataset::DataType::DE_FLOAT32:
      return mindspore::TypeId::kNumberTypeFloat32;
    case dataset::DataType::DE_FLOAT64:
      return mindspore::TypeId::kNumberTypeFloat64;
    default:
      return kTypeUnknown;
  }
 }

 MSTensor *DETensor::CreateTensor(TypeId data_type, const std::vector<int> &shape) {
  return new DETensor(data_type, shape);
 }
@@ -113,7 +52,7 @@ DETensor::DETensor(TypeId data_type, const std::vector<int> &shape) {
  t_shape.reserve(shape.size());
  std::transform(shape.begin(), shape.end(), std::back_inserter(t_shape),
                 [](int s) -> dataset::dsize_t { return static_cast<dataset::dsize_t>(s); });
  dataset::Tensor::CreateEmpty(dataset::TensorShape(t_shape), MSTypeToDEType(data_type), &this->tensor_impl_);
  dataset::Tensor::CreateEmpty(dataset::TensorShape(t_shape), dataset::MSTypeToDEType(data_type), &this->tensor_impl_);
 }

 DETensor::DETensor(std::shared_ptr<dataset::Tensor> tensor_ptr) { this->tensor_impl_ = std::move(tensor_ptr); }
@@ -133,14 +72,14 @@ std::shared_ptr<dataset::Tensor> DETensor::tensor() const {

 TypeId DETensor::data_type() const {
  MS_ASSERT(this->tensor_impl_ != nullptr);
  return DETypeToMSType(this->tensor_impl_->type());
  return dataset::DETypeToMSType(this->tensor_impl_->type());
 }

 TypeId DETensor::set_data_type(TypeId data_type) {
  MS_ASSERT(this->tensor_impl_ != nullptr);
  if (data_type != this->data_type()) {
    std::shared_ptr<dataset::Tensor> temp;
    dataset::Tensor::CreateFromMemory(this->tensor_impl_->shape(), MSTypeToDEType(data_type),
    dataset::Tensor::CreateFromMemory(this->tensor_impl_->shape(), dataset::MSTypeToDEType(data_type),
                                      this->tensor_impl_->GetBuffer(), &temp);
    this->tensor_impl_ = temp;
  }
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
@@ -50,13 +50,6 @@ Status RandomDataOp::Builder::Build(std::shared_ptr<RandomDataOp> *out_op) {
    std::make_shared<RandomDataOp>(builder_num_workers_, builder_op_connector_size_, builder_rows_per_buffer_,
                                   builder_total_rows_, std::move(builder_data_schema_), std::move(builder_sampler_));

  // If the user did not provide a schema, then we will ask the op to generate a pseudo-random
  // schema.
  // See details of generateSchema function to learn what type of schema it will create.
  if ((*out_op)->data_schema_ == nullptr) {
    RETURN_IF_NOT_OK((*out_op)->GenerateSchema());
  }

  return Status::OK();
 }

@@ -85,6 +78,12 @@ RandomDataOp::RandomDataOp(int32_t num_workers, int32_t op_connector_size, int64
  if (total_rows_ == 0) {
    total_rows_ = GenRandomInt(1, kMaxTotalRows);
  }
  // If the user did not provide a schema, then we will ask the op to generate a pseudo-random
  // schema.
  // See details of generateSchema function to learn what type of schema it will create.
  if (data_schema_ == nullptr) {
    GenerateSchema();
  }
  // Everyone is already out from the sync area.
  all_out_.Set();
 }
@@ -106,11 +105,7 @@ void RandomDataOp::Print(std::ostream &out, bool show_all) const {
 }

 // Helper function to produce a default/random schema if one didn't exist
 Status RandomDataOp::GenerateSchema() {
  if (data_schema_ != nullptr) {
    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Generating a schema but one already exists!");
  }

 void RandomDataOp::GenerateSchema() {
  // To randomly create a schema, we need to choose:
  // a) how many columns
  // b) the type of each column
@@ -144,8 +139,6 @@ Status RandomDataOp::GenerateSchema() {

    data_schema_->AddColumn(*newCol);
  }

  return Status::OK();
 }

 // Class functor operator () override.
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h
@@ -213,9 +213,8 @@ class RandomDataOp : public ParallelOp {

  /**
   * Helper function to produce a default/random schema if one didn't exist
   @return Status - The error code return
  */
  Status GenerateSchema();
   */
  void GenerateSchema();

  /**
   * Performs a synchronization between workers at the end of an epoch
--- a/mindspore/ccsrc/minddata/dataset/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@@ -25,9 +25,11 @@
 #include <utility>
 #include <string>
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/engine/data_schema.h"
 #include "minddata/dataset/include/tensor.h"
 #include "minddata/dataset/include/iterator.h"
 #include "minddata/dataset/include/samplers.h"
 #include "minddata/dataset/include/type_id.h"

 namespace mindspore {
 namespace dataset {
@@ -41,6 +43,7 @@ class TensorShape;
 namespace api {

 class TensorOperation;
 class SchemaObj;
 class SamplerObj;
 // Datasets classes (in alphabetical order)
 class CelebADataset;
@@ -51,6 +54,7 @@ class CocoDataset;
 class ImageFolderDataset;
 class ManifestDataset;
 class MnistDataset;
 class RandomDataset;
 class TextFileDataset;
 class VOCDataset;
 // Dataset Op classes (in alphabetical order)
@@ -65,6 +69,11 @@ class SkipDataset;
 class TakeDataset;
 class ZipDataset;

 /// \brief Function to create a SchemaObj
 /// \param[in] schema_file Path of schema file
 /// \return Shared pointer to the current schema
 std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");

 /// \brief Function to create a CelebADataset
 /// \notes The generated dataset has two columns ['image', 'attr'].
 //     The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
@@ -187,6 +196,21 @@ std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir,
 std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
                                         const std::shared_ptr<Dataset> &datasets2);

 /// \brief Function to create a RandomDataset
 /// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random)
 /// \param[in] schema SchemaObj to set column type, data type and data shape
 /// \param[in] columns_list List of columns to be read (default=None, read all columns)
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
 ///    will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 template <typename T = std::shared_ptr<SchemaObj>>
 std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schema = nullptr,
                                          std::vector<std::string> columns_list = {},
                                          std::shared_ptr<SamplerObj> sampler = nullptr) {
  auto ds = std::make_shared<RandomDataset>(total_rows, schema, std::move(columns_list), std::move(sampler));
  return ds->ValidateParams() ? ds : nullptr;
 }

 /// \brief Function to create a TextFileDataset
 /// \notes The generated dataset has one column ['text']
 /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
@@ -355,6 +379,66 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
  int32_t worker_connector_size_;
 };

 class SchemaObj {
 public:
  /// \brief Constructor
  explicit SchemaObj(const std::string &schema_file = "");

  /// \brief Destructor
  ~SchemaObj() = default;

  /// \brief SchemaObj init function
  /// \return bool true if schema init success
  bool init();

  /// \brief Add new column to the schema
  /// \param[in] name name of the column.
  /// \param[in] de_type data type of the column(TypeId).
  /// \param[in] shape shape of the column.
  /// \return bool true if schema init success
  bool add_column(std::string name, TypeId de_type, std::vector<int32_t> shape);

  /// \brief Add new column to the schema
  /// \param[in] name name of the column.
  /// \param[in] de_type data type of the column(std::string).
  /// \param[in] shape shape of the column.
  /// \return bool true if schema init success
  bool add_column(std::string name, std::string de_type, std::vector<int32_t> shape);

  /// \brief Get a JSON string of the schema
  /// \return JSON string of the schema
  std::string to_json();

  /// \brief Get a JSON string of the schema
  std::string to_string() { return to_json(); }

  /// \brief set a new value to dataset_type
  inline void set_dataset_type(std::string dataset_type) { dataset_type_ = dataset_type; }

  /// \brief set a new value to num_rows
  inline void set_num_rows(int32_t num_rows) { num_rows_ = num_rows; }

  /// \brief get the current num_rows
  inline int32_t get_num_rows() { return num_rows_; }

 private:
  /// \brief Parse the columns and add it to columns
  /// \param[in] columns dataset attribution information, decoded from schema file.
  ///    support both nlohmann::json::value_t::array and nlohmann::json::value_t::onject.
  /// \return JSON string of the schema
  bool parse_column(nlohmann::json columns);

  /// \brief Get schema file from json file
  /// \param[in] json_obj object of json parsed.
  /// \return bool true if json dump success
  bool from_json(nlohmann::json json_obj);

  int32_t num_rows_;
  std::string dataset_type_;
  std::string schema_file_;
  nlohmann::json columns_;
 };

 /* ####################################### Derived Dataset classes ################################# */

 // DERIVED DATASET CLASSES FOR LEAF-NODE DATASETS
@@ -562,6 +646,53 @@ class MnistDataset : public Dataset {
  std::shared_ptr<SamplerObj> sampler_;
 };

 class RandomDataset : public Dataset {
 public:
  // Some constants to provide limits to random generation.
  static constexpr int32_t kMaxNumColumns = 4;
  static constexpr int32_t kMaxRank = 4;
  static constexpr int32_t kMaxDimValue = 32;

  /// \brief Constructor
  RandomDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema, std::vector<std::string> columns_list,
                std::shared_ptr<SamplerObj> sampler)
      : total_rows_(total_rows),
        schema_path_(""),
        schema_(std::move(schema)),
        columns_list_(columns_list),
        sampler_(std::move(sampler)) {}

  /// \brief Constructor
  RandomDataset(const int32_t &total_rows, std::string schema_path, std::vector<std::string> columns_list,
                std::shared_ptr<SamplerObj> sampler)
      : total_rows_(total_rows), schema_path_(schema_path), columns_list_(columns_list), sampler_(std::move(sampler)) {}

  /// \brief Destructor
  ~RandomDataset() = default;

  /// \brief a base class override function to create the required runtime dataset op objects for this class
  /// \return The list of shared pointers to the newly created DatasetOps
  std::vector<std::shared_ptr<DatasetOp>> Build() override;

  /// \brief Parameters validation
  /// \return bool true if all the params are valid
  bool ValidateParams() override;

 private:
  /// \brief A quick inline for producing a random number between (and including) min/max
  /// \param[in] min minimum number that can be generated.
  /// \param[in] max maximum number that can be generated.
  /// \return The generated random number
  int32_t GenRandomInt(int32_t min, int32_t max);

  int32_t total_rows_;
  std::string schema_path_;
  std::shared_ptr<SchemaObj> schema_;
  std::vector<std::string> columns_list_;
  std::shared_ptr<SamplerObj> sampler_;
  std::mt19937 rand_gen_;
 };

 /// \class TextFileDataset
 /// \brief A Dataset derived class to represent TextFile dataset
 class TextFileDataset : public Dataset {
--- a/mindspore/ccsrc/minddata/dataset/include/type_id.h
+++ b/mindspore/ccsrc/minddata/dataset/include/type_id.h
@@ -0,0 +1,88 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TYPEID_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TYPEID_H_

 #include "minddata/dataset/core/data_type.h"
 #include "mindspore/core/ir/dtype/type_id.h"

 namespace mindspore {
 namespace dataset {
 inline dataset::DataType MSTypeToDEType(TypeId data_type) {
  switch (data_type) {
    case kNumberTypeBool:
      return dataset::DataType(dataset::DataType::DE_BOOL);
    case kNumberTypeInt8:
      return dataset::DataType(dataset::DataType::DE_INT8);
    case kNumberTypeUInt8:
      return dataset::DataType(dataset::DataType::DE_UINT8);
    case kNumberTypeInt16:
      return dataset::DataType(dataset::DataType::DE_INT16);
    case kNumberTypeUInt16:
      return dataset::DataType(dataset::DataType::DE_UINT16);
    case kNumberTypeInt32:
      return dataset::DataType(dataset::DataType::DE_INT32);
    case kNumberTypeUInt32:
      return dataset::DataType(dataset::DataType::DE_UINT32);
    case kNumberTypeInt64:
      return dataset::DataType(dataset::DataType::DE_INT64);
    case kNumberTypeUInt64:
      return dataset::DataType(dataset::DataType::DE_UINT64);
    case kNumberTypeFloat16:
      return dataset::DataType(dataset::DataType::DE_FLOAT16);
    case kNumberTypeFloat32:
      return dataset::DataType(dataset::DataType::DE_FLOAT32);
    case kNumberTypeFloat64:
      return dataset::DataType(dataset::DataType::DE_FLOAT64);
    default:
      return dataset::DataType(dataset::DataType::DE_UNKNOWN);
  }
 }

 inline TypeId DETypeToMSType(dataset::DataType data_type) {
  switch (data_type.value()) {
    case dataset::DataType::DE_BOOL:
      return mindspore::TypeId::kNumberTypeBool;
    case dataset::DataType::DE_INT8:
      return mindspore::TypeId::kNumberTypeInt8;
    case dataset::DataType::DE_UINT8:
      return mindspore::TypeId::kNumberTypeUInt8;
    case dataset::DataType::DE_INT16:
      return mindspore::TypeId::kNumberTypeInt16;
    case dataset::DataType::DE_UINT16:
      return mindspore::TypeId::kNumberTypeUInt16;
    case dataset::DataType::DE_INT32:
      return mindspore::TypeId::kNumberTypeInt32;
    case dataset::DataType::DE_UINT32:
      return mindspore::TypeId::kNumberTypeUInt32;
    case dataset::DataType::DE_INT64:
      return mindspore::TypeId::kNumberTypeInt64;
    case dataset::DataType::DE_UINT64:
      return mindspore::TypeId::kNumberTypeUInt64;
    case dataset::DataType::DE_FLOAT16:
      return mindspore::TypeId::kNumberTypeFloat16;
    case dataset::DataType::DE_FLOAT32:
      return mindspore::TypeId::kNumberTypeFloat32;
    case dataset::DataType::DE_FLOAT64:
      return mindspore::TypeId::kNumberTypeFloat64;
    default:
      return kTypeUnknown;
  }
 }
 }  // namespace dataset
 }  // namespace mindspore

 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TYPEID_H_
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -104,6 +104,7 @@ SET(DE_UT_SRCS
        c_api_dataset_clue_test.cc
        c_api_dataset_coco_test.cc
        c_api_dataset_filetext_test.cc
        c_api_dataset_randomdata_test.cc
        c_api_dataset_voc_test.cc
        c_api_datasets_test.cc
        c_api_dataset_iterator_test.cc
--- a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
@@ -0,0 +1,271 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "common/common.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/core/global_context.h"

 #include "mindspore/core/ir/dtype/type_id.h"

 using namespace mindspore::dataset;
 using namespace mindspore::dataset::api;
 using mindspore::dataset::Tensor;
 using mindspore::dataset::TensorShape;
 using mindspore::dataset::DataType;

 class MindDataTestPipeline : public UT::DatasetOpTesting {
 protected:
 };

 TEST_F(MindDataTestPipeline, TestRandomDatasetBasic1) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetBasic1.";

  // Create a RandomDataset
  std::shared_ptr<SchemaObj> schema = Schema();
  schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
  schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
  std::shared_ptr<Dataset> ds = RandomData(50, schema);
  EXPECT_NE(ds, nullptr);

  ds = ds->SetNumWorkers(4);
  EXPECT_NE(ds, nullptr);

  // Create a Repeat operation on ds
  ds = ds->Repeat(4);
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  iter->GetNextRow(&row);

  // Check if RandomDataOp read correct columns
  uint64_t i = 0;
  while (row.size() != 0) {
    auto image = row["image"];
    auto label = row["label"];
    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
    MS_LOG(INFO) << "Tensor label shape: " << label->shape();

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 200);

  // Manually terminate the pipeline
  iter->Stop();
 }

 TEST_F(MindDataTestPipeline, TestRandomDatasetBasic2) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetBasic2.";

  // Create a RandomDataset
  std::shared_ptr<Dataset> ds = RandomData(10);
  EXPECT_NE(ds, nullptr);

  ds = ds->SetNumWorkers(1);
  EXPECT_NE(ds, nullptr);

  // Create a Repeat operation on ds
  ds = ds->Repeat(2);
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  iter->GetNextRow(&row);

  // Check if RandomDataOp read correct columns
  uint64_t i = 0;
  while (row.size() != 0) {
    auto image = row["image"];
    auto label = row["label"];
    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
    MS_LOG(INFO) << "Tensor label shape: " << label->shape();

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 20);

  // Manually terminate the pipeline
  iter->Stop();
 }

 TEST_F(MindDataTestPipeline, TestRandomDatasetBasic3) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetBasic3.";

  // Create a RandomDataset
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(246);

  std::string SCHEMA_FILE = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json";
  std::shared_ptr<SchemaObj> schema = Schema(SCHEMA_FILE);
  std::shared_ptr<Dataset> ds = RandomData(0, schema);
  EXPECT_NE(ds, nullptr);

  // Create a Repeat operation on ds
  ds = ds->Repeat(2);
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  iter->GetNextRow(&row);

  // Check if RandomDataOp read correct columns
  uint64_t i = 0;
  while (row.size() != 0) {
    auto col_sint16 = row["col_sint16"];
    auto col_sint32 = row["col_sint32"];
    auto col_sint64 = row["col_sint64"];
    auto col_float = row["col_float"];
    auto col_1d = row["col_1d"];
    auto col_2d = row["col_2d"];
    auto col_3d = row["col_3d"];
    auto col_binary = row["col_binary"];

    // validate shape
    ASSERT_EQ(col_sint16->shape(), TensorShape({1}));
    ASSERT_EQ(col_sint32->shape(), TensorShape({1}));
    ASSERT_EQ(col_sint64->shape(), TensorShape({1}));
    ASSERT_EQ(col_float->shape(), TensorShape({1}));
    ASSERT_EQ(col_1d->shape(), TensorShape({2}));
    ASSERT_EQ(col_2d->shape(), TensorShape({2, 2}));
    ASSERT_EQ(col_3d->shape(), TensorShape({2, 2, 2}));
    ASSERT_EQ(col_binary->shape(), TensorShape({1}));

    // validate Rank
    ASSERT_EQ(col_sint16->Rank(), 1);
    ASSERT_EQ(col_sint32->Rank(), 1);
    ASSERT_EQ(col_sint64->Rank(), 1);
    ASSERT_EQ(col_float->Rank(), 1);
    ASSERT_EQ(col_1d->Rank(), 1);
    ASSERT_EQ(col_2d->Rank(), 2);
    ASSERT_EQ(col_3d->Rank(), 3);
    ASSERT_EQ(col_binary->Rank(), 1);

    // validate type
    ASSERT_EQ(col_sint16->type(), DataType::DE_INT16);
    ASSERT_EQ(col_sint32->type(), DataType::DE_INT32);
    ASSERT_EQ(col_sint64->type(), DataType::DE_INT64);
    ASSERT_EQ(col_float->type(), DataType::DE_FLOAT32);
    ASSERT_EQ(col_1d->type(), DataType::DE_INT64);
    ASSERT_EQ(col_2d->type(), DataType::DE_INT64);
    ASSERT_EQ(col_3d->type(), DataType::DE_INT64);
    ASSERT_EQ(col_binary->type(), DataType::DE_UINT8);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 984);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }

 TEST_F(MindDataTestPipeline, TestRandomDatasetBasic4) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetBasic3.";

  // Create a RandomDataset
  u_int32_t curr_seed = GlobalContext::config_manager()->seed();
  GlobalContext::config_manager()->set_seed(246);

  std::string SCHEMA_FILE = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json";
  std::shared_ptr<Dataset> ds = RandomData(0, SCHEMA_FILE);
  EXPECT_NE(ds, nullptr);

  // Create a Repeat operation on ds
  ds = ds->Repeat(2);
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  iter->GetNextRow(&row);

  // Check if RandomDataOp read correct columns
  uint64_t i = 0;
  while (row.size() != 0) {
    auto col_sint16 = row["col_sint16"];
    auto col_sint32 = row["col_sint32"];
    auto col_sint64 = row["col_sint64"];
    auto col_float = row["col_float"];
    auto col_1d = row["col_1d"];
    auto col_2d = row["col_2d"];
    auto col_3d = row["col_3d"];
    auto col_binary = row["col_binary"];

    // validate shape
    ASSERT_EQ(col_sint16->shape(), TensorShape({1}));
    ASSERT_EQ(col_sint32->shape(), TensorShape({1}));
    ASSERT_EQ(col_sint64->shape(), TensorShape({1}));
    ASSERT_EQ(col_float->shape(), TensorShape({1}));
    ASSERT_EQ(col_1d->shape(), TensorShape({2}));
    ASSERT_EQ(col_2d->shape(), TensorShape({2, 2}));
    ASSERT_EQ(col_3d->shape(), TensorShape({2, 2, 2}));
    ASSERT_EQ(col_binary->shape(), TensorShape({1}));

    // validate Rank
    ASSERT_EQ(col_sint16->Rank(), 1);
    ASSERT_EQ(col_sint32->Rank(), 1);
    ASSERT_EQ(col_sint64->Rank(), 1);
    ASSERT_EQ(col_float->Rank(), 1);
    ASSERT_EQ(col_1d->Rank(), 1);
    ASSERT_EQ(col_2d->Rank(), 2);
    ASSERT_EQ(col_3d->Rank(), 3);
    ASSERT_EQ(col_binary->Rank(), 1);

    // validate type
    ASSERT_EQ(col_sint16->type(), DataType::DE_INT16);
    ASSERT_EQ(col_sint32->type(), DataType::DE_INT32);
    ASSERT_EQ(col_sint64->type(), DataType::DE_INT64);
    ASSERT_EQ(col_float->type(), DataType::DE_FLOAT32);
    ASSERT_EQ(col_1d->type(), DataType::DE_INT64);
    ASSERT_EQ(col_2d->type(), DataType::DE_INT64);
    ASSERT_EQ(col_3d->type(), DataType::DE_INT64);
    ASSERT_EQ(col_binary->type(), DataType::DE_UINT8);

    iter->GetNextRow(&row);
    i++;
  }

  EXPECT_EQ(i, 984);

  // Manually terminate the pipeline
  iter->Stop();
  GlobalContext::config_manager()->set_seed(curr_seed);
 }