!10799 make user-facing headers standalone for minddata

From: @mhmotallebi Reviewed-by: Signed-off-by:
5 years ago · a477a97278
--- a/mindspore/ccsrc/minddata/dataset/api/config.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/config.cc
@@ -17,6 +17,7 @@
 #include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/core/global_context.h"
 #include "minddata/dataset/include/config.h"
 #include "minddata/dataset/util/log_adapter.h"
 #include "minddata/dataset/util/status.h"

 namespace mindspore {
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -19,16 +19,33 @@
 #include <fstream>
 #include <unordered_set>
 #include <utility>

 #include "minddata/dataset/engine/runtime_context.h"
 #include "minddata/dataset/include/samplers.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/util/path.h"
 #include "minddata/dataset/util/status.h"

 #include "minddata/dataset/core/client.h"
 #include "minddata/dataset/engine/consumers/tree_consumer.h"

 #include "minddata/dataset/kernels/c_func_op.h"
 #include "minddata/dataset/kernels/tensor_op.h"

 #ifndef ENABLE_ANDROID
 #include "minddata/dataset/engine/ir/cache/dataset_cache_impl.h"
 #endif

 #ifndef ENABLE_ANDROID
 #include "minddata/dataset/text/sentence_piece_vocab.h"
 #include "minddata/dataset/text/vocab.h"
 #endif

 // Sampler headers (in alphabetical order)
 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"

 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"

 // IR non-leaf nodes
 #include "minddata/dataset/engine/ir/datasetops/batch_node.h"
 #ifndef ENABLE_ANDROID
@@ -57,7 +74,6 @@
 #endif

 #include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/util/path.h"
 #include "minddata/dataset/util/random.h"
 #include "minddata/dataset/util/services.h"

@@ -939,6 +955,7 @@ TFRecordDataset::TFRecordDataset(const std::vector<std::string> &dataset_files,
                                           shard_id, shard_equal_rows, cache);
  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }

 #endif
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/api/iterator.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
@@ -16,11 +16,15 @@
 #include "minddata/dataset/include/iterator.h"
 #include "minddata/dataset/core/client.h"
 #include "minddata/dataset/engine/consumers/tree_consumer.h"
 #include "minddata/dataset/engine/runtime_context.h"
 #include "minddata/dataset/include/datasets.h"

 namespace mindspore {
 namespace dataset {

 Iterator::Iterator() : consumer_(nullptr) {}
 Iterator::~Iterator() { Stop(); }

 // Get the next row from the data pipeline.
 bool Iterator::GetNextRow(TensorMap *row) {
  Status rc = consumer_->GetNextAsMap(row);
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/datasets_bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/datasets_bindings.cc
@@ -23,6 +23,8 @@
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/core/global_context.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/text/sentence_piece_vocab.h"

 // IR non-leaf nodes
 #include "minddata/dataset/engine/ir/datasetops/batch_node.h"
 #include "minddata/dataset/engine/ir/datasetops/concat_node.h"
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
@@ -21,6 +21,7 @@
 #include "minddata/dataset/api/python/pybind_register.h"
 #include "minddata/dataset/text/vocab.h"
 #include "minddata/dataset/text/sentence_piece_vocab.h"
 #include "minddata/dataset/include/constants.h"

 namespace mindspore {
 namespace dataset {
--- a/mindspore/ccsrc/minddata/dataset/api/text.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/text.cc
@@ -39,6 +39,7 @@
 #include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
 #include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h"
 #endif
 #include "minddata/dataset/core/data_type.h"
 #include "minddata/dataset/util/path.h"

 namespace mindspore {
@@ -87,7 +88,7 @@ std::shared_ptr<JiebaTokenizerOperation> JiebaTokenizer(const std::string &hmm_p
 }

 std::shared_ptr<LookupOperation> Lookup(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
                                        const DataType &data_type) {
                                        const std::string &data_type) {
  auto op = std::make_shared<LookupOperation>(vocab, unknown_token, data_type);

  return op->ValidateParams() ? op : nullptr;
@@ -142,7 +143,7 @@ std::shared_ptr<SlidingWindowOperation> SlidingWindow(const int32_t width, const
  return op->ValidateParams() ? op : nullptr;
 }

 std::shared_ptr<ToNumberOperation> ToNumber(const DataType data_type) {
 std::shared_ptr<ToNumberOperation> ToNumber(const std::string &data_type) {
  auto op = std::make_shared<ToNumberOperation>(data_type);

  return op->ValidateParams() ? op : nullptr;
@@ -200,6 +201,19 @@ Status ValidateTokenizerDirParam(const std::string &tokenizer_name, const std::s
  return Status::OK();
 }

 // Helper functions to help validate data type passed by user
 bool IsTypeNumeric(const std::string &data_type) {
  if (data_type == "int8" || data_type == "uint8" || data_type == "int16" || data_type == "uint16" ||
      data_type == "int32" || data_type == "uint32" || data_type == "int64" || data_type == "uint64" ||
      data_type == "float16" || data_type == "float32" || data_type == "float64")
    return true;
  return false;
 }

 bool IsTypeBoolean(const std::string &data_type) { return data_type == "bool"; }

 bool IsTypeString(const std::string &data_type) { return data_type == "string"; }

 /* ####################################### Derived TensorOperation classes ################################# */

 // (In alphabetical order)
@@ -239,6 +253,8 @@ BertTokenizerOperation::BertTokenizerOperation(const std::shared_ptr<Vocab> &voc
      preserve_unused_token_(preserve_unused_token),
      with_offsets_(with_offsets) {}

 BertTokenizerOperation::~BertTokenizerOperation() = default;

 Status BertTokenizerOperation::ValidateParams() {
  if (vocab_ == nullptr) {
    std::string err_msg = "BertTokenizer: vocab object type is incorrect or null.";
@@ -303,9 +319,11 @@ std::shared_ptr<TensorOp> JiebaTokenizerOperation::Build() {

 // LookupOperation
 LookupOperation::LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
                                 const DataType &data_type)
                                 const std::string &data_type)
    : vocab_(vocab), unknown_token_(unknown_token), default_id_(Vocab::kNoTokenExists), data_type_(data_type) {}

 LookupOperation::~LookupOperation() = default;

 Status LookupOperation::ValidateParams() {
  if (vocab_ == nullptr) {
    std::string err_msg = "Lookup: vocab object type is incorrect or null.";
@@ -320,7 +338,7 @@ Status LookupOperation::ValidateParams() {
    RETURN_STATUS_SYNTAX_ERROR(err_msg);
  }

  if (!data_type_.IsNumeric()) {
  if (!IsTypeNumeric(data_type_)) {
    std::string err_msg = "Lookup does not support a string to string mapping, data_type can only be numeric.";
    MS_LOG(ERROR) << err_msg;
    RETURN_STATUS_SYNTAX_ERROR(err_msg);
@@ -330,7 +348,7 @@ Status LookupOperation::ValidateParams() {
 }

 std::shared_ptr<TensorOp> LookupOperation::Build() {
  std::shared_ptr<LookupOp> tensor_op = std::make_shared<LookupOp>(vocab_, default_id_, data_type_);
  std::shared_ptr<LookupOp> tensor_op = std::make_shared<LookupOp>(vocab_, default_id_, DataType(data_type_));
  return tensor_op;
 }

@@ -419,6 +437,8 @@ std::shared_ptr<TensorOp> RegexTokenizerOperation::Build() {
 #endif

 // SentencePieceTokenizerOperation
 SentencePieceTokenizerOperation::~SentencePieceTokenizerOperation() = default;

 SentencePieceTokenizerOperation::SentencePieceTokenizerOperation(const std::shared_ptr<SentencePieceVocab> &vocab,
                                                                 SPieceTokenizerOutType out_type)
    : vocab_(vocab), vocab_path_(std::string()), load_type_(SPieceTokenizerLoadType::kModel), out_type_(out_type) {}
@@ -482,11 +502,11 @@ std::shared_ptr<TensorOp> SlidingWindowOperation::Build() {
 }

 // ToNumberOperation
 ToNumberOperation::ToNumberOperation(DataType data_type) : data_type_(data_type) {}
 ToNumberOperation::ToNumberOperation(std::string data_type) : data_type_(data_type) {}

 Status ToNumberOperation::ValidateParams() {
  if (!data_type_.IsNumeric() || data_type_.IsBool()) {
    std::string err_msg = "ToNumber : The parameter data_type must be a numeric type, got: " + data_type_.ToString();
  if (!IsTypeNumeric(data_type_) || IsTypeBoolean(data_type_)) {
    std::string err_msg = "ToNumber : The parameter data_type must be a numeric type, got: " + data_type_;
    MS_LOG(ERROR) << err_msg;
    RETURN_STATUS_SYNTAX_ERROR(err_msg);
  }
--- a/mindspore/ccsrc/minddata/dataset/core/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/core/constants.h
@@ -59,6 +59,9 @@ enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
 // Possible values for SPieceTokenizerLoadType
 enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };

 // Possible values for SentencePieceModel
 enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };

 // Possible values for NormalizeForm
 enum class NormalizeForm {
  kNone = 0,
--- a/mindspore/ccsrc/minddata/dataset/core/global_context.h
+++ b/mindspore/ccsrc/minddata/dataset/core/global_context.h
@@ -19,6 +19,7 @@
 #include <memory>
 #include <mutex>

 #include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/util/allocator.h"
 #include "minddata/dataset/util/status.h"
@@ -27,7 +28,6 @@ namespace mindspore {
 namespace dataset {
 // forward declare
 class MemoryPool;
 class ConfigManager;
 class Tensor;
 class CVTensor;

--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
@@ -22,8 +22,10 @@
 #include <utility>
 #include <vector>
 #include "minddata/dataset/engine/consumers/tree_consumer.h"
 #include "minddata/dataset/engine/tree_adapter.h"
 #include "minddata/dataset/engine/datasetops/device_queue_op.h"
 #include "minddata/dataset/engine/opt/pre/getter_pass.h"
 #include "minddata/dataset/engine/tree_adapter.h"
 #include "minddata/mindrecord/include/shard_index_generator.h"

 #ifndef ENABLE_ANDROID
 #include "minddata/mindrecord/include/shard_header.h"
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.h
@@ -23,6 +23,7 @@
 #include <unordered_map>
 #include <vector>

 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 #include "minddata/dataset/include/datasets.h"

 namespace mindspore {
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
@@ -24,13 +24,26 @@
 #include <utility>
 #include <vector>

 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/engine/consumers/tree_consumer.h"
 #include "minddata/dataset/engine/data_schema.h"
 #include "minddata/dataset/engine/datasetops/filter_op.h"
 #include "minddata/dataset/engine/datasetops/map_op/map_op.h"
 #include "minddata/dataset/engine/datasetops/project_op.h"
 #include "minddata/dataset/engine/datasetops/repeat_op.h"
 #include "minddata/dataset/engine/datasetops/shuffle_op.h"
 #include "minddata/dataset/engine/datasetops/skip_op.h"
 #include "minddata/dataset/engine/datasetops/take_op.h"
 #include "minddata/dataset/engine/ir/cache/dataset_cache.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/util/path.h"
 #include "minddata/dataset/util/status.h"

 namespace mindspore {
 namespace dataset {

 class Dataset;
 class DatasetCache;
 class SamplerObj;
 class IRNodePass;
 class DatasetSizeGetter;
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.h
@@ -22,6 +22,7 @@
 #include <string>
 #include <vector>

 #include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"

 namespace mindspore {
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
@@ -24,6 +24,7 @@
 #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
 #include "minddata/dataset/util/random.h"
 #include "minddata/dataset/util/status.h"

 namespace mindspore {
 namespace dataset {

--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
@@ -22,7 +22,9 @@
 #include <utility>
 #include <vector>

 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 #include "minddata/dataset/include/samplers.h"

 namespace mindspore {
 namespace dataset {
--- a/mindspore/ccsrc/minddata/dataset/include/config.h
+++ b/mindspore/ccsrc/minddata/dataset/include/config.h
@@ -20,8 +20,6 @@
 #include <cstdint>
 #include <string>

 #include "minddata/dataset/util/log_adapter.h"

 namespace mindspore {
 namespace dataset {

--- a/mindspore/ccsrc/minddata/dataset/include/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/include/constants.h
@@ -59,6 +59,9 @@ enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
 // Possible values for SPieceTokenizerLoadType
 enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };

 // Possible values for SentencePieceModel
 enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };

 // Possible values for NormalizeForm
 enum class NormalizeForm {
  kNone = 0,
--- a/mindspore/ccsrc/minddata/dataset/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@@ -17,6 +17,7 @@
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_

 #include <sys/stat.h>
 #include <unistd.h>
 #include <map>
 #include <memory>
@@ -26,27 +27,18 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
 #include "minddata/dataset/engine/ir/cache/dataset_cache.h"

 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/engine/consumers/tree_consumer.h"
 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 #include "minddata/dataset/include/iterator.h"
 #include "minddata/dataset/include/samplers.h"
 #include "minddata/dataset/include/tensor.h"
 #include "minddata/dataset/include/text.h"
 #include "minddata/dataset/include/type_id.h"
 #include "minddata/dataset/kernels/c_func_op.h"
 #include "minddata/dataset/kernels/tensor_op.h"
 #include "minddata/dataset/util/path.h"
 #ifndef ENABLE_ANDROID
 #include "minddata/dataset/text/sentence_piece_vocab.h"
 #include "minddata/dataset/text/vocab.h"
 #endif

 namespace mindspore {
 namespace dataset {

 class Tensor;
 class TensorRow;
 class TensorShape;
 class TreeAdapter;
 class TreeGetters;
@@ -54,6 +46,7 @@ class TreeGetters;
 class Vocab;
 #endif

 class DatasetCache;
 class DatasetNode;

 class Iterator;
@@ -77,12 +70,20 @@ class ConcatDataset;
 class RenameDataset;
 #endif

 #ifndef ENABLE_ANDROID
 class SentencePieceVocab;
 enum class SentencePieceModel;
 #endif

 class DSCallback;

 class RepeatDataset;

 #ifndef ENABLE_ANDROID
 class SkipDataset;
 class TakeDataset;
 class ZipDataset;

 #endif

 /// \class Dataset datasets.h
@@ -969,8 +970,12 @@ std::shared_ptr<TFRecordDataset> TFRecord(const std::vector<std::string> &datase
  } else {
    std::string schema_path = schema;
    if (!schema_path.empty()) {
      Path schema_file(schema_path);
      if (!schema_file.Exists()) {
      struct stat sb;
      int rc = stat(common::SafeCStr(schema_path), &sb);
      if (rc == -1 && errno != ENOENT) {
        MS_LOG(WARNING) << "Unable to query the status of [" << schema_path << "]. Errno = " << errno << ".";
      }
      if (rc != 0) {
        MS_LOG(ERROR) << "TFRecordDataset: schema path [" << schema_path << "] is invalid or does not exist.";
        return nullptr;
      }
--- a/mindspore/ccsrc/minddata/dataset/include/de_tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/include/de_tensor.h
@@ -14,14 +14,14 @@
 * limitations under the License.
 */

 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_API_DETENSOR_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_API_DETENSOR_H_
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DETENSOR_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DETENSOR_H_
 #include <string>
 #include <vector>
 #include <memory>
 #include "include/ms_tensor.h"
 #include "minddata/dataset/include/status.h"
 #include "minddata/dataset/include/tensor.h"
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
 namespace tensor {
 class DETensor : public mindspore::tensor::MSTensor {
@@ -79,4 +79,4 @@ class DETensor : public mindspore::tensor::MSTensor {
 };
 }  // namespace tensor
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_API_DETENSOR_H_
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DETENSOR_H_
--- a/mindspore/ccsrc/minddata/dataset/include/execute.h
+++ b/mindspore/ccsrc/minddata/dataset/include/execute.h
@@ -14,12 +14,13 @@
 * limitations under the License.
 */

 #ifndef DATASET_API_EXECUTE_H_
 #define DATASET_API_EXECUTE_H_
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_

 #include <vector>
 #include <memory>
 #include "minddata/dataset/core/constants.h"

 #include "minddata/dataset/include/constants.h"
 #ifdef ENABLE_ANDROID
 #include "minddata/dataset/include/de_tensor.h"
 #endif
@@ -55,4 +56,4 @@ class Execute {

 }  // namespace dataset
 }  // namespace mindspore
 #endif  // DATASET_API_EXECUTE_H_
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
--- a/mindspore/ccsrc/minddata/dataset/include/iterator.h
+++ b/mindspore/ccsrc/minddata/dataset/include/iterator.h
@@ -21,7 +21,6 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "minddata/dataset/engine/runtime_context.h"
 #include "minddata/dataset/include/status.h"

 namespace mindspore {
@@ -45,10 +44,10 @@ using TensorVec = std::vector<std::shared_ptr<Tensor>>;
 class Iterator {
 public:
  /// \brief Constructor
  Iterator() : consumer_(nullptr) {}
  Iterator();

  /// \brief Destructor
  ~Iterator() { Stop(); }
  ~Iterator();

  /// \brief Method for building and launching the pipeline.
  /// \param[in] ops - a vector of DatasetOp in the data pipeline.
--- a/mindspore/ccsrc/minddata/dataset/include/samplers.h
+++ b/mindspore/ccsrc/minddata/dataset/include/samplers.h
@@ -21,10 +21,11 @@
 #include <string>
 #include <vector>

 #include "minddata/dataset/util/status.h"
 #include "minddata/dataset/include/status.h"
 #ifndef ENABLE_ANDROID
 #include "minddata/mindrecord/include/shard_column.h"
 #include "minddata/mindrecord/include/shard_error.h"
 #include "minddata/mindrecord/include/shard_operator.h"
 #include "minddata/mindrecord/include/shard_reader.h"
 #endif

--- a/mindspore/ccsrc/minddata/dataset/include/status.h
+++ b/mindspore/ccsrc/minddata/dataset/include/status.h
@@ -51,6 +51,13 @@ namespace dataset {
    }                                                                      \
  } while (false)

 #define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e)                 \
  do {                                                                 \
    if (!(_condition)) {                                               \
      return Status(StatusCode::kSyntaxError, __LINE__, __FILE__, _e); \
    }                                                                  \
  } while (false)

 #define RETURN_UNEXPECTED_IF_NULL(_ptr)                                         \
  do {                                                                          \
    if ((_ptr) == nullptr) {                                                    \
@@ -71,6 +78,15 @@ namespace dataset {
    return Status(StatusCode::kSyntaxError, __LINE__, __FILE__, _e); \
  } while (false)

 #define RETURN_SECOND_IF_ERROR(_s, _r) \
  do {                                 \
    Status __rc = (_s);                \
    if (__rc.IsError()) {              \
      MS_LOG(ERROR) << __rc;           \
      return _r;                       \
    }                                  \
  } while (false)

 enum class StatusCode : char {
  kOK = 0,
  kOutOfMemory = 1,
@@ -151,6 +167,12 @@ class Status {
  StatusCode code_;
  std::string err_msg_;
 };

 #if !defined(_WIN32) && !defined(_WIN64)
 const float MAX_MEMORY_USAGE_THRESHOLD = 0.95;

 float GetMemoryUsage();
 #endif
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_
--- a/mindspore/ccsrc/minddata/dataset/include/text.h
+++ b/mindspore/ccsrc/minddata/dataset/include/text.h
@@ -22,18 +22,16 @@
 #include <utility>
 #include <vector>

 #include "mindspore/ccsrc/minddata/dataset/core/data_type.h"
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/include/constants.h"
 #include "minddata/dataset/include/status.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/util/status.h"

 #include "minddata/dataset/text/kernels/sentence_piece_tokenizer_op.h"
 #include "minddata/dataset/text/sentence_piece_vocab.h"
 #include "minddata/dataset/text/vocab.h"

 namespace mindspore {
 namespace dataset {

 class Vocab;
 class SentencePieceVocab;

 // Transform operations for text
 namespace text {

@@ -146,10 +144,11 @@ std::shared_ptr<JiebaTokenizerOperation> JiebaTokenizer(const std::string &hmm_p
 /// \param[in] vocab a Vocab object.
 /// \param[in] unknown_token word to use for lookup if the word being looked up is out of Vocabulary (oov).
 ///   If unknown_token is oov, runtime error will be thrown.
 /// \param[in] DataType type of the tensor after lookup, typically int32.
 /// \param[in] data_type type of the tensor after lookup, typically int32.
 /// \return Shared pointer to the current TensorOperation.

 std::shared_ptr<LookupOperation> Lookup(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
                                        const mindspore::dataset::DataType &data_type = DataType("int32"));
                                        const std::string &data_type = "int32");

 /// \brief TensorOp to generate n-gram from a 1-D string Tensor.
 /// \param[in] ngrams ngrams is a vector of positive integers. For example, if ngrams={4, 3}, then the result
@@ -226,9 +225,9 @@ std::shared_ptr<SlidingWindowOperation> SlidingWindow(const int32_t width, const
 ///   https://en.cppreference.com/w/cpp/string/basic_string/stof,
 ///   https://en.cppreference.com/w/cpp/string/basic_string/stoul,
 ///   except that any strings which represent negative numbers cannot be cast to an unsigned integer type.
 /// \param[in] data_type DataType of the tensor to be casted to. Must be a numeric type.
 /// \param[in] data_type of the tensor to be casted to. Must be a numeric type.
 /// \return Shared pointer to the current TensorOperation.
 std::shared_ptr<ToNumberOperation> ToNumber(const DataType data_type);
 std::shared_ptr<ToNumberOperation> ToNumber(const std::string &data_type);

 /// \brief Truncate a pair of rank-1 tensors such that the total length is less than max_length.
 /// \param[in] max_length Maximum length required.
@@ -285,7 +284,7 @@ class BertTokenizerOperation : public TensorOperation {
                         bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token,
                         bool with_offsets);

  ~BertTokenizerOperation() = default;
  ~BertTokenizerOperation();

  std::shared_ptr<TensorOp> Build() override;

@@ -342,9 +341,9 @@ class JiebaTokenizerOperation : public TensorOperation {
 class LookupOperation : public TensorOperation {
 public:
  explicit LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
                           const DataType &data_type);
                           const std::string &data_type);

  ~LookupOperation() = default;
  ~LookupOperation();

  std::shared_ptr<TensorOp> Build() override;

@@ -356,7 +355,7 @@ class LookupOperation : public TensorOperation {
  std::shared_ptr<Vocab> vocab_;
  std::string unknown_token_;
  int32_t default_id_;
  DataType data_type_;
  std::string data_type_;
 };

 class NgramOperation : public TensorOperation {
@@ -439,7 +438,7 @@ class SentencePieceTokenizerOperation : public TensorOperation {

  SentencePieceTokenizerOperation(const std::string &vocab_path, SPieceTokenizerOutType out_type);

  ~SentencePieceTokenizerOperation() = default;
  ~SentencePieceTokenizerOperation();

  std::shared_ptr<TensorOp> Build() override;

@@ -473,7 +472,7 @@ class SlidingWindowOperation : public TensorOperation {

 class ToNumberOperation : public TensorOperation {
 public:
  explicit ToNumberOperation(DataType data_type);
  explicit ToNumberOperation(std::string data_type);

  ~ToNumberOperation() = default;

@@ -484,7 +483,7 @@ class ToNumberOperation : public TensorOperation {
  std::string Name() const override { return kToNumberOperation; }

 private:
  DataType data_type_;
  std::string data_type_;
 };

 class TruncateSequencePairOperation : public TensorOperation {
--- a/mindspore/ccsrc/minddata/dataset/include/type_id.h
+++ b/mindspore/ccsrc/minddata/dataset/include/type_id.h
@@ -16,7 +16,6 @@
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TYPEID_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TYPEID_H_

 #include "minddata/dataset/core/data_type.h"
 #include "mindspore/core/ir/dtype/type_id.h"

 namespace mindspore {
--- a/mindspore/ccsrc/minddata/dataset/text/sentence_piece_vocab.h
+++ b/mindspore/ccsrc/minddata/dataset/text/sentence_piece_vocab.h
@@ -22,10 +22,11 @@
 #include <vector>
 #include <unordered_map>
 #include "minddata/dataset/util/status.h"
 #include "minddata/dataset/include/constants.h"

 namespace mindspore {
 namespace dataset {
 enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };

 class SentencePieceVocab {
 public:
  static Status BuildFromFile(const std::vector<std::string> &path_list, const int vocab_size,
--- a/tests/ut/cpp/dataset/build_vocab_test.cc
+++ b/tests/ut/cpp/dataset/build_vocab_test.cc
@@ -22,6 +22,7 @@
 #include "common/common.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/include/status.h"
 #include "minddata/dataset/text/vocab.h"

 using mindspore::dataset::Tensor;
 using mindspore::dataset::Status;
--- a/tests/ut/cpp/dataset/c_api_cache_test.cc
+++ b/tests/ut/cpp/dataset/c_api_cache_test.cc
@@ -17,8 +17,6 @@
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/include/vision.h"

 #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"

 using namespace mindspore::dataset;

 // Helper function to get the session id from SESSION_ID env variable
@@ -28,7 +26,6 @@ class MindDataTestCacheOp : public UT::DatasetOpTesting {
 public:
  void SetUp() override {
    DatasetOpTesting::SetUp();
    GlobalInit();
  }
 };

--- a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
@@ -15,6 +15,7 @@
 */
 #include "common/common.h"
 #include "minddata/dataset/core/tensor_row.h"
 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/include/vision.h"

--- a/tests/ut/cpp/dataset/c_api_samplers_test.cc
+++ b/tests/ut/cpp/dataset/c_api_samplers_test.cc
@@ -14,6 +14,7 @@
 * limitations under the License.
 */
 #include "common/common.h"
 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 #include "minddata/dataset/include/datasets.h"

 using namespace mindspore::dataset;
--- a/tests/ut/cpp/dataset/c_api_text_sentence_piece_vocab_test.cc
+++ b/tests/ut/cpp/dataset/c_api_text_sentence_piece_vocab_test.cc
@@ -18,11 +18,12 @@
 #include <string>

 #include "common/common.h"
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/include/constants.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/include/status.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/include/text.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/text/sentence_piece_vocab.h"

 using namespace mindspore::dataset;
 using mindspore::dataset::SentencePieceModel;
--- a/tests/ut/cpp/dataset/c_api_text_test.cc
+++ b/tests/ut/cpp/dataset/c_api_text_test.cc
@@ -21,11 +21,11 @@
 #include "minddata/dataset/include/config.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/include/status.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/include/text.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/text/vocab.h"

 using namespace mindspore::dataset;
 using mindspore::dataset::DataType;
 using mindspore::dataset::ShuffleMode;
 using mindspore::dataset::Status;
 using mindspore::dataset::Tensor;
@@ -1011,7 +1011,7 @@ TEST_F(MindDataTestPipeline, TestToNumberSuccess1) {
  EXPECT_NE(ds, nullptr);

  // Create ToNumber operation on ds
  std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("int64"));
  std::shared_ptr<TensorOperation> to_number = text::ToNumber("int64");
  EXPECT_NE(to_number, nullptr);

  // Create a Map operation on ds
@@ -1064,7 +1064,7 @@ TEST_F(MindDataTestPipeline, TestToNumberSuccess2) {
  EXPECT_NE(ds, nullptr);

  // Create ToNumber operation on ds
  std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("float64"));
  std::shared_ptr<TensorOperation> to_number = text::ToNumber("float64");
  EXPECT_NE(to_number, nullptr);

  // Create a Map operation on ds
@@ -1117,7 +1117,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail1) {
  EXPECT_NE(ds, nullptr);

  // Create ToNumber operation on ds
  std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("int8"));
  std::shared_ptr<TensorOperation> to_number = text::ToNumber("int8");
  EXPECT_NE(to_number, nullptr);

  // Create a Map operation on ds
@@ -1167,7 +1167,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail2) {
  EXPECT_NE(ds, nullptr);

  // Create ToNumber operation on ds
  std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("float16"));
  std::shared_ptr<TensorOperation> to_number = text::ToNumber("float16");
  EXPECT_NE(to_number, nullptr);

  // Create a Map operation on ds
@@ -1213,7 +1213,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail3) {
  EXPECT_NE(ds, nullptr);

  // Create ToNumber operation on ds
  std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("int64"));
  std::shared_ptr<TensorOperation> to_number = text::ToNumber("int64");
  EXPECT_NE(to_number, nullptr);

  // Create a Map operation on ds
@@ -1246,7 +1246,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail3) {

 TEST_F(MindDataTestPipeline, TestToNumberFail4) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestToNumberFail4.";
  // Test ToNumber with non numerical DataType
  // Test ToNumber with non numerical data type

  std::string data_file = datasets_root_path_ + "/testTokenizerData/to_number.txt";

@@ -1255,15 +1255,15 @@ TEST_F(MindDataTestPipeline, TestToNumberFail4) {
  EXPECT_NE(ds, nullptr);

  // Create ToNumber operation on ds
  std::shared_ptr<TensorOperation> to_number1 = text::ToNumber(DataType("string"));
  std::shared_ptr<TensorOperation> to_number1 = text::ToNumber("string");

  // Expect failure: invalid parameter with non numerical DataType
  // Expect failure: invalid parameter with non numerical data type
  EXPECT_EQ(to_number1, nullptr);

  // Create ToNumber operation on ds
  std::shared_ptr<TensorOperation> to_number2 = text::ToNumber(DataType("bool"));
  std::shared_ptr<TensorOperation> to_number2 = text::ToNumber("bool");

  // Expect failure: invalid parameter with non numerical DataType
  // Expect failure: invalid parameter with non numerical data type
  EXPECT_EQ(to_number2, nullptr);
 }

--- a/tests/ut/cpp/dataset/c_api_text_vocab_test.cc
+++ b/tests/ut/cpp/dataset/c_api_text_vocab_test.cc
@@ -20,8 +20,9 @@
 #include "common/common.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/include/status.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/include/text.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/text/vocab.h"

 using namespace mindspore::dataset;
 using mindspore::dataset::DataType;
@@ -49,7 +50,7 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOp) {
  EXPECT_EQ(s, Status::OK());

  // Create Lookup operation on ds
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "<unk>", DataType("int32"));
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "<unk>", "int32");
  EXPECT_NE(lookup, nullptr);

  // Create Map operation on ds
@@ -93,7 +94,7 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOpEmptyString) {
  EXPECT_EQ(s, Status::OK());

  // Create Lookup operation on ds
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "", DataType("int32"));
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "", "int32");
  EXPECT_NE(lookup, nullptr);

  // Create Map operation on ds
@@ -137,7 +138,7 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOpFail1) {

  // Create lookup op for ds
  // Expected failure: "<unk>" is not a word of vocab
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "<unk>", DataType("int32"));
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "<unk>", "int32");
  EXPECT_EQ(lookup, nullptr);
 }

@@ -148,7 +149,7 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOpFail2) {

  // Create lookup op
  // Expected failure: vocab is null
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "", DataType("int32"));
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "", "int32");
  EXPECT_EQ(lookup, nullptr);
 }

@@ -170,7 +171,7 @@ TEST_F(MindDataTestPipeline, TestVocabFromDataset) {
  EXPECT_EQ(home_index, 4);

  // Create Lookup operation on ds
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "<unk>", DataType("int32"));
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "<unk>", "int32");
  EXPECT_NE(lookup, nullptr);

  // Create Map operation on ds
@@ -324,7 +325,7 @@ TEST_F(MindDataTestPipeline, TestVocabFromDatasetInt64) {
  EXPECT_EQ(home_index, 2);

  // Create Lookup operation on ds
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "home", DataType("int64"));
  std::shared_ptr<TensorOperation> lookup = text::Lookup(vocab, "home", "int64");
  EXPECT_NE(lookup, nullptr);

  // Create Map operation on ds
--- a/tests/ut/cpp/dataset/ir_callback_test.cc
+++ b/tests/ut/cpp/dataset/ir_callback_test.cc
@@ -21,6 +21,7 @@
 #include "minddata/dataset/callback/ds_callback.h"
 #include "minddata/dataset/core/client.h"
 #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
 #include "minddata/dataset/engine/tree_adapter.h"
 #include "minddata/dataset/include/datasets.h"
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/kernels/data/no_op.h"