!21823 [assistant][ops] Add data operator CharNGram

Merge pull request !21823 from 张渝/CharNGram
4 years ago · db9b66242b
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
@@ -19,6 +19,7 @@

 #include "minddata/dataset/api/python/pybind_register.h"
 #include "minddata/dataset/include/dataset/constants.h"
 #include "minddata/dataset/text/char_n_gram.h"
 #include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/glove.h"
 #include "minddata/dataset/text/sentence_piece_vocab.h"
@@ -90,6 +91,16 @@ PYBIND_REGISTER(SentencePieceModel, 0, ([](const py::module *m) {
                    .export_values();
                }));

 PYBIND_REGISTER(CharNGram, 1, ([](const py::module *m) {
                  (void)py::class_<CharNGram, Vectors, std::shared_ptr<CharNGram>>(*m, "CharNGram")
                    .def(py::init<>())
                    .def_static("from_file", [](const std::string &path, int32_t max_vectors) {
                      std::shared_ptr<CharNGram> char_n_gram;
                      THROW_IF_ERROR(CharNGram::BuildFromFile(&char_n_gram, path, max_vectors));
                      return char_n_gram;
                    });
                }));

 PYBIND_REGISTER(FastText, 1, ([](const py::module *m) {
                  (void)py::class_<FastText, Vectors, std::shared_ptr<FastText>>(*m, "FastText")
                    .def(py::init<>())
--- a/mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/text/CMakeLists.txt
@@ -4,6 +4,7 @@ add_subdirectory(kernels)
 file(GLOB _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(text OBJECT
        char_n_gram.cc
        fast_text.cc
        glove.cc
        sentence_piece_vocab.cc
--- a/mindspore/ccsrc/minddata/dataset/text/char_n_gram.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/char_n_gram.cc
@@ -0,0 +1,98 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "minddata/dataset/text/char_n_gram.h"

 #include "utils/file_utils.h"

 namespace mindspore {
 namespace dataset {
 CharNGram::CharNGram(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim) : Vectors(map, dim) {}

 Status CharNGram::BuildFromFile(std::shared_ptr<CharNGram> *char_n_gram, const std::string &path, int32_t max_vectors) {
  RETURN_UNEXPECTED_IF_NULL(char_n_gram);
  std::unordered_map<std::string, std::vector<float>> map;
  int vector_dim = -1;
  RETURN_IF_NOT_OK(CharNGram::Load(path, max_vectors, &map, &vector_dim));
  *char_n_gram = std::make_shared<CharNGram>(std::move(map), vector_dim);
  return Status::OK();
 }

 std::vector<float> CharNGram::Lookup(const std::string &token, const std::vector<float> &unk_init,
                                     bool lower_case_backup) {
  std::vector<float> init_vec(dim_, 0);
  if (!unk_init.empty()) {
    if (unk_init.size() != dim_) {
      MS_LOG(WARNING) << "CharNGram: size of unk_init is not the same as vectors, will initialize with zero vectors.";
    } else {
      init_vec = unk_init;
    }
  }
  std::string lower_token = token;
  if (lower_case_backup) {
    std::transform(lower_token.begin(), lower_token.end(), lower_token.begin(), ::tolower);
  }

  std::vector<std::string> chars;
  chars.push_back("#BEGIN#");
  for (int i = 0; i < lower_token.length(); i++) {
    std::string s;
    s.push_back(lower_token[i]);  // Convert a char type letter to a string type.
    chars.push_back(s);
  }
  chars.push_back("#END#");

  int len = chars.size();
  int num_vectors = 0;
  std::vector<float> vector_value_sum(dim_, 0);
  std::vector<float> vector_value_temp;
  // The length of meaningful characters in the pre-training file is 2, 3, 4.
  const int slice_len[3] = {2, 3, 4};
  const int slice_len_size = sizeof(slice_len) / sizeof(slice_len[0]);
  for (int i = 0; i < slice_len_size; i++) {
    int end = len - slice_len[i] + 1;
    for (int pos = 0; pos < end; pos++) {
      std::vector<std::string> gram_vec;
      std::vector<std::string>::const_iterator first = chars.begin() + pos;
      std::vector<std::string>::const_iterator second = first + slice_len[i];
      gram_vec.assign(first, second);
      std::string c = "";
      std::string gram = std::accumulate(gram_vec.begin(), gram_vec.end(), c);
      std::string gram_key = std::to_string(slice_len[i]) + "gram-" + gram;
      auto str_index = map_.find(gram_key);
      if (str_index == map_.end()) {
        vector_value_temp = init_vec;
      } else {
        vector_value_temp = str_index->second;
      }
      if (vector_value_temp != init_vec) {
        std::transform(vector_value_temp.begin(), vector_value_temp.end(), vector_value_sum.begin(),
                       vector_value_sum.begin(), std::plus<float>());
        num_vectors++;
      }
    }
  }
  std::vector<float> vector_value(dim_, 0);
  if (num_vectors > 0) {
    std::transform(vector_value_sum.begin(), vector_value_sum.end(), vector_value.begin(),
                   [&num_vectors](float value) -> float { return value / num_vectors; });
    return vector_value;
  } else {
    return init_vec;
  }
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/text/char_n_gram.h
+++ b/mindspore/ccsrc/minddata/dataset/text/char_n_gram.h
@@ -0,0 +1,64 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_CHAR_N_GRAM_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_CHAR_N_GRAM_H_

 #include <algorithm>
 #include <functional>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>

 #include "minddata/dataset/text/vectors.h"

 namespace mindspore {
 namespace dataset {
 /// \brief Build CharNGram vectors from reading a Pre-train word vectors.
 class CharNGram : public Vectors {
 public:
  // Constructor.
  CharNGram() = default;

  /// Constructor.
  /// \param[in] map A map between string and vector.
  /// \param[in] dim Dimension of the vectors.
  CharNGram(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim);

  // Destructor.
  ~CharNGram() = default;

  /// \brief Build CharNGram from reading a CharNGram pre-train vector file.
  /// \param[out] char_n_gram CharNGram object which contains the pre-train vectors.
  /// \param[in] path Path to the CharNGram pre-trained word vector file.
  /// \param[in] max_vectors This can be used to limit the number of pre-trained vectors loaded (default=0, no limit).
  static Status BuildFromFile(std::shared_ptr<CharNGram> *char_n_gram, const std::string &path,
                              int32_t max_vectors = 0);

  /// \brief Look up embedding vectors of token.
  /// \param[in] token A token to be looked up.
  /// \param[in] unk_init In case of the token is out-of-vectors (OOV), the result will be initialized with `unk_init`.
  ///     (default={}, means to initialize with zero vectors).
  /// \param[in] lower_case_backup Whether to look up the token in the lower case (Default = false).
  /// \return The vector of the input token.
  std::vector<float> Lookup(const std::string &token, const std::vector<float> &unk_init = {},
                            bool lower_case_backup = false);
 };
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_CHAR_N_GRAM_H_
--- a/mindspore/ccsrc/minddata/dataset/text/fast_text.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/fast_text.cc
@@ -20,7 +20,7 @@

 namespace mindspore {
 namespace dataset {
 FastText::FastText(const std::unordered_map<std::string, std::vector<float>> &map, int dim) : Vectors(map, dim) {}
 FastText::FastText(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim) : Vectors(map, dim) {}

 Status CheckFastText(const std::string &file_path) {
  Path path = Path(file_path);
--- a/mindspore/ccsrc/minddata/dataset/text/fast_text.h
+++ b/mindspore/ccsrc/minddata/dataset/text/fast_text.h
@@ -39,7 +39,7 @@ class FastText : public Vectors {
  /// Constructor.
  /// \param[in] map A map between string and vector.
  /// \param[in] dim Dimension of the vectors.
  FastText(const std::unordered_map<std::string, std::vector<float>> &map, int dim);
  FastText(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim);

  /// Destructor.
  ~FastText() = default;
--- a/mindspore/ccsrc/minddata/dataset/text/glove.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/glove.cc
@@ -20,7 +20,7 @@

 namespace mindspore {
 namespace dataset {
 GloVe::GloVe(const std::unordered_map<std::string, std::vector<float>> &map, int dim) : Vectors(map, dim) {}
 GloVe::GloVe(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim) : Vectors(map, dim) {}

 Status CheckGloVe(const std::string &file_path) {
  Path path = Path(file_path);
--- a/mindspore/ccsrc/minddata/dataset/text/glove.h
+++ b/mindspore/ccsrc/minddata/dataset/text/glove.h
@@ -39,7 +39,7 @@ class GloVe : public Vectors {
  /// Constructor.
  /// \param[in] map A map between string and vector.
  /// \param[in] dim Dimension of the vectors.
  GloVe(const std::unordered_map<std::string, std::vector<float>> &map, int dim);
  GloVe(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim);

  /// Destructor.
  ~GloVe() = default;
--- a/mindspore/ccsrc/minddata/dataset/text/vectors.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/vectors.cc
@@ -60,7 +60,7 @@ Status Vectors::InferShape(const std::string &path, int32_t max_vectors, int32_t
 }

 Status Vectors::Load(const std::string &path, int32_t max_vectors,
                     std::unordered_map<std::string, std::vector<float>> *map, int *vector_dim) {
                     std::unordered_map<std::string, std::vector<float>> *map, int32_t *vector_dim) {
  RETURN_UNEXPECTED_IF_NULL(map);
  RETURN_UNEXPECTED_IF_NULL(vector_dim);
  auto realpath = FileUtils::GetRealPath(common::SafeCStr(path));
@@ -107,7 +107,7 @@ Status Vectors::Load(const std::string &path, int32_t max_vectors,
  return Status::OK();
 }

 Vectors::Vectors(const std::unordered_map<std::string, std::vector<float>> &map, int dim) {
 Vectors::Vectors(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim) {
  map_ = std::move(map);
  dim_ = dim;
 }
--- a/mindspore/ccsrc/minddata/dataset/text/vectors.h
+++ b/mindspore/ccsrc/minddata/dataset/text/vectors.h
@@ -40,7 +40,7 @@ class Vectors {
  /// Constructor.
  /// \param[in] map A map between string and vector.
  /// \param[in] dim Dimension of the vectors.
  Vectors(const std::unordered_map<std::string, std::vector<float>> &map, int dim);
  Vectors(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim);

  /// Destructor.
  virtual ~Vectors() = default;
@@ -61,7 +61,7 @@ class Vectors {
                                    bool lower_case_backup = false);

  /// \brief Getter of dimension.
  const int &Dim() const { return dim_; }
  const int32_t &Dim() const { return dim_; }

 protected:
  /// \brief Infer the shape of the pre-trained word vector file.
@@ -79,9 +79,9 @@ class Vectors {
  /// \param[out] map The map between words and vectors.
  /// \param[out] vector_dim The dimension of the vectors in the file.
  static Status Load(const std::string &path, int32_t max_vectors,
                     std::unordered_map<std::string, std::vector<float>> *map, int *vector_dim);
                     std::unordered_map<std::string, std::vector<float>> *map, int32_t *vector_dim);

  int dim_;
  int32_t dim_;
  std::unordered_map<std::string, std::vector<float>> map_;
 };
 }  // namespace dataset
--- a/mindspore/dataset/text/init.py
+++ b/mindspore/dataset/text/init.py
@@ -28,14 +28,14 @@ import platform
 from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, \
    TruncateSequencePair, ToNumber, SlidingWindow, SentencePieceTokenizer, PythonTokenizer, ToVectors
 from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
    SPieceTokenizerOutType, SPieceTokenizerLoadType, Vectors, FastText, GloVe
    SPieceTokenizerOutType, SPieceTokenizerLoadType, Vectors, FastText, GloVe, CharNGram

 __all__ = [
    "Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram",
    "to_str", "to_bytes", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
    "PythonTokenizer", "SlidingWindow", "SentencePieceVocab", "SentencePieceTokenizer", "SPieceTokenizerOutType",
    "SentencePieceModel", "SPieceTokenizerLoadType", "JiebaMode", "NormalizeForm", "Vectors", "ToVectors", "FastText",
    "GloVe"
    "GloVe", "CharNGram"
 ]

 if platform.system().lower() != 'windows':
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@@ -27,7 +27,7 @@ from .validators import check_from_file, check_from_list, check_from_dict, check
    check_from_file_vectors

 __all__ = [
    "Vocab", "SentencePieceVocab", "to_str", "to_bytes", "Vectors", "FastText", "GloVe"
    "Vocab", "SentencePieceVocab", "to_str", "to_bytes", "Vectors", "FastText", "GloVe", "CharNGram"
 ]


@@ -465,3 +465,29 @@ class GloVe(cde.GloVe):

        max_vectors = max_vectors if max_vectors is not None else 0
        return super().from_file(file_path, max_vectors)


 class CharNGram(cde.CharNGram):
    """
    CharNGram object that is used to map tokens into pre-trained vectors.
    """

    @classmethod
    @check_from_file_vectors
    def from_file(cls, file_path, max_vectors=None):
        """
        Build a CharNGram vector from a file.

        Args:
            file_path (str): Path of the file that contains the CharNGram vectors.
            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
                passing max_vectors can limit the size of the loaded set (default=None, no limit).

        Examples:
            >>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
        """

        max_vectors = max_vectors if max_vectors is not None else 0
        return super().from_file(file_path, max_vectors)
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -53,6 +53,7 @@ SET(DE_UT_SRCS
        c_api_repeat_test.cc
        c_api_samplers_test.cc
        c_api_text_sentence_piece_vocab_test.cc
        c_api_text_test.cc
        c_api_text_vocab_test.cc
        c_api_text_test.cc
        c_api_transforms_test.cc
--- a/tests/ut/cpp/dataset/c_api_text_test.cc
+++ b/tests/ut/cpp/dataset/c_api_text_test.cc
@@ -23,6 +23,7 @@
 #include "minddata/dataset/include/dataset/datasets.h"
 #include "minddata/dataset/include/dataset/text.h"
 #include "minddata/dataset/include/dataset/transforms.h"
 #include "minddata/dataset/text/char_n_gram.h"
 #include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/glove.h"
 #include "minddata/dataset/text/vectors.h"
@@ -30,6 +31,7 @@

 using namespace mindspore::dataset;
 using mindspore::Status;
 using mindspore::dataset::CharNGram;
 using mindspore::dataset::FastText;
 using mindspore::dataset::GloVe;
 using mindspore::dataset::ShuffleMode;
@@ -1658,7 +1660,8 @@ TEST_F(MindDataTestPipeline, TestToNumberFail1) {
  EXPECT_NE(ds, nullptr);

  // Create ToNumber operation on ds
  std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>(mindspore::DataType::kNumberTypeInt8);
  std::shared_ptr<TensorTransform> to_number =
    std::make_shared<text::ToNumber>(mindspore::DataType::kNumberTypeInt8);
  EXPECT_NE(to_number, nullptr);

  // Create a Map operation on ds
@@ -3740,7 +3743,7 @@ TEST_F(MindDataTestPipeline, TestVectorsUnknownInit) {
 ///     `unknown_init` and `lower_case_backup` in function Lookup. But some tokens have some big letters
 /// Expectation: return correct MSTensor which is equal to the expected
 TEST_F(MindDataTestPipeline, TestVectorsAllParams) {
  //  Test with all parameters.
  // Test with all parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVectorsAllParams.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
@@ -3801,7 +3804,7 @@ TEST_F(MindDataTestPipeline, TestVectorsAllParams) {
 /// Description: test with pre-vectors set that have the different dimension
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestVectorsDifferentDimension) {
  //  Tokens don't have the same number of vectors.
  // Tokens don't have the same number of vectors.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVectorsDifferentDimension.";

  // Create a TextFile dataset
@@ -3819,7 +3822,7 @@ TEST_F(MindDataTestPipeline, TestVectorsDifferentDimension) {
 /// Description: test with pre-vectors set that has the head-info
 /// Expectation: return correct MSTensor which is equal to the expected
 TEST_F(MindDataTestPipeline, TestVectorsWithHeadInfo) {
  //  Test with words that has head info.
  // Test with words that has head info.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVectorsWithHeadInfo.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
@@ -3880,7 +3883,7 @@ TEST_F(MindDataTestPipeline, TestVectorsWithHeadInfo) {
 /// Description: test with the parameter max_vectors that is <= 0
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestVectorsMaxVectorsLessThanZero) {
  //  Test with max_vectors <= 0.
  // Test with max_vectors <= 0.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVectorsMaxVectorsLessThanZero.";

  // Create a TextFile dataset
@@ -3898,7 +3901,7 @@ TEST_F(MindDataTestPipeline, TestVectorsMaxVectorsLessThanZero) {
 /// Description: test with the pre-vectors file that is empty
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestVectorsWithEmptyFile) {
  //  Read empty file.
  // Read empty file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVectorsWithEmptyFile.";

  // Create a TextFile dataset
@@ -3916,7 +3919,7 @@ TEST_F(MindDataTestPipeline, TestVectorsWithEmptyFile) {
 /// Description: test with the pre-vectors file that is not exist
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestVectorsWithNotExistFile) {
  //  Test with not exist file.
  // Test with not exist file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVectorsWithNotExistFile.";

  // Create a TextFile dataset
@@ -3934,7 +3937,7 @@ TEST_F(MindDataTestPipeline, TestVectorsWithNotExistFile) {
 /// Description: test with the pre-vectors set that has a situation that info-head is not the first line in the set
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestVectorsWithWrongInfoFile) {
  //  wrong info.
  // Wrong info.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVectorsWithWrongInfoFile.";

  // Create a TextFile dataset
@@ -4137,7 +4140,7 @@ TEST_F(MindDataTestPipeline, TestFastTextUnknownInit) {
 ///     `unknown_init` and `lower_case_backup` in function Lookup. But some tokens have some big letters
 /// Expectation: return correct MSTensor which is equal to the expected
 TEST_F(MindDataTestPipeline, TestFastTextAllParams) {
  //  Test with all parameters.
  // Test with all parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextAllParams.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/test_fast_text/words.txt";
@@ -4198,7 +4201,7 @@ TEST_F(MindDataTestPipeline, TestFastTextAllParams) {
 /// Description: test with pre-vectors set that have the different dimension
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextDifferentDimension) {
  //  Tokens don't have the same number of vectors.
  // Tokens don't have the same number of vectors.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextDifferentDimension.";

  // Create a TextFile dataset
@@ -4216,7 +4219,7 @@ TEST_F(MindDataTestPipeline, TestFastTextDifferentDimension) {
 /// Description: test with the parameter max_vectors that is <= 0
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextMaxVectorsLessThanZero) {
  //  Test with max_vectors <= 0.
  // Test with max_vectors <= 0.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextMaxVectorsLessThanZero.";

  // Create a TextFile dataset
@@ -4234,7 +4237,7 @@ TEST_F(MindDataTestPipeline, TestFastTextMaxVectorsLessThanZero) {
 /// Description: test with the pre-vectors file that is empty
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextWithEmptyFile) {
  //  Read empty file.
  // Read empty file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithEmptyFile.";

  // Create a TextFile dataset
@@ -4252,7 +4255,7 @@ TEST_F(MindDataTestPipeline, TestFastTextWithEmptyFile) {
 /// Description: test with the pre-vectors file that is not exist
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextWithNotExistFile) {
  //  Test with not exist file.
  // Test with not exist file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithNotExistFile.";

  // Create a TextFile dataset
@@ -4270,7 +4273,7 @@ TEST_F(MindDataTestPipeline, TestFastTextWithNotExistFile) {
 /// Description: test with the pre-vectors set that has a situation that info-head is not the first line in the set
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextWithWrongInfoFile) {
  //  wrong info.
  // Wrong info.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithWrongInfoFile.";

  // Create a TextFile dataset
@@ -4288,7 +4291,7 @@ TEST_F(MindDataTestPipeline, TestFastTextWithWrongInfoFile) {
 /// Description: test with the pre-vectors set that has a wrong suffix
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestFastTextWithWrongSuffix) {
  //  wrong info.
  // Wrong info.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFastTextWithWrongSuffix.";

  // Create a TextFile dataset
@@ -4497,7 +4500,7 @@ TEST_F(MindDataTestPipeline, TestGloVeUnknownInit) {
 ///     `unknown_init` and `lower_case_backup` in function Lookup. But some tokens have some big letters
 /// Expectation: return correct MSTensor which is equal to the expected
 TEST_F(MindDataTestPipeline, TestGloVeAllParams) {
  //  Test with all parameters.
  // Test with all parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGloVeAllParams.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testGloVe/words.txt";
@@ -4560,7 +4563,7 @@ TEST_F(MindDataTestPipeline, TestGloVeAllParams) {
 /// Description: test with pre-vectors set that have the different dimension
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestGloVeDifferentDimension) {
  //  Tokens don't have the same number of glove.
  // Tokens don't have the same number of glove.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGloVeDifferentDimension.";

  // Create a TextFile dataset
@@ -4578,7 +4581,7 @@ TEST_F(MindDataTestPipeline, TestGloVeDifferentDimension) {
 /// Description: test with the parameter max_vectors that is <= 0
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestGloVeMaxVectorsLessThanZero) {
  //  Test with max_vectors <= 0.
  // Test with max_vectors <= 0.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGloVeMaxVectorsLessThanZero.";

  // Create a TextFile dataset
@@ -4596,7 +4599,7 @@ TEST_F(MindDataTestPipeline, TestGloVeMaxVectorsLessThanZero) {
 /// Description: test with the pre-vectors file that is empty
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestGloVeWithEmptyFile) {
  //  Read empty file.
  // Read empty file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGloVeWithEmptyFile.";

  // Create a TextFile dataset
@@ -4614,7 +4617,7 @@ TEST_F(MindDataTestPipeline, TestGloVeWithEmptyFile) {
 /// Description: test with the pre-vectors file that is not exist
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestGloVeWithNotExistFile) {
  //  Test with not exist file.
  // Test with not exist file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGloVeWithNotExistFile.";

  // Create a TextFile dataset
@@ -4632,7 +4635,7 @@ TEST_F(MindDataTestPipeline, TestGloVeWithNotExistFile) {
 /// Description: test with the pre-vectors set that has a situation that info-head is not the first line in the set
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestGloVeWithWrongInfoFile) {
  //  wrong info.
  // Wrong info.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGloVeWithWrongInfoFile.";

  // Create a TextFile dataset
@@ -4650,7 +4653,7 @@ TEST_F(MindDataTestPipeline, TestGloVeWithWrongInfoFile) {
 /// Description: test with the pre-vectors set that has a wrong format
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestGloVeWithWrongFormat) {
  //  wrong info.
  // Wrong info.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGloVeWithWrongFormat.";

  // Create a TextFile dataset
@@ -4663,3 +4666,324 @@ TEST_F(MindDataTestPipeline, TestGloVeWithWrongFormat) {
  Status s = GloVe::BuildFromFile(&glove, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }

 /// Feature: CharNGram
 /// Description: test with default parameter in function BuildFromFile and function Lookup
 /// Expectation: return correct MSTensor which is equal to the excepted
 TEST_F(MindDataTestPipeline, TestCharNGramDefaultParam) {
  // Test with default parameter.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCharNGramDefaultParam.";

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  std::string vectors_dir = datasets_root_path_ + "/testVectors/char_n_gram_20.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir);
  EXPECT_EQ(s, Status::OK());
  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(char_n_gram);
  EXPECT_NE(lookup, nullptr);

  // Create Map operation on ds
  ds = ds->Map({lookup}, {"text"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

  uint64_t i = 0;
  std::vector<std::vector<float>> expected = {{0,0,0,0,0},
                                              {0,0,0,0,0},
                                              {0.117336,0.362446,-0.983326,0.939264,-0.05648},
                                              {0.657201,2.11761,-1.59276,0.432072,1.21395},
                                              {0,0,0,0,0},
                                              {-2.26956,0.288491,-0.740001,0.661703,0.147355},
                                              {0,0,0,0,0}};
  while (row.size() != 0) {
    auto ind = row["text"];
    MS_LOG(INFO) << ind.Shape();
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    TensorPtr de_expected_item;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_item));
    mindspore::MSTensor ms_expected_item =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
    std::vector<int64_t> ind_shape = ind.Shape();
    std::vector<int64_t> ms_expected_shape = ms_expected_item.Shape();
    EXPECT_EQ(ind_shape, ms_expected_shape);

    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }

  EXPECT_EQ(i, 7);

  // Manually terminate the pipeline
  iter->Stop();
 }

 /// Feature: CharNGram.
 /// Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile
 /// Expectation: return correct MSTensor which is equal to the excepted
 TEST_F(MindDataTestPipeline, TestCharNGramAllBuildfromfileParams) {
  // Test with two parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCharNGramAllBuildfromfileParams.";

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  std::string vectors_dir = datasets_root_path_ + "/testVectors/char_n_gram_20.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir, 18);
  EXPECT_EQ(s, Status::OK());

  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(char_n_gram);
  EXPECT_NE(lookup, nullptr);

  // Create Map operation on ds
  ds = ds->Map({lookup}, {"text"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

  uint64_t i = 0;
  std::vector<std::vector<float>> expected = {{0,0,0,0,0},
                                              {0,0,0,0,0},
                                              {-0.155665,0.664073,-0.538499,1.22657,-0.2162},
                                              {0.657201,2.11761,-1.59276,0.432072,1.21395},
                                              {0,0,0,0,0},
                                              {-2.26956,0.288491,-0.740001,0.661703,0.147355},
                                              {0,0,0,0,0}};
  while (row.size() != 0) {
    auto ind = row["text"];
    MS_LOG(INFO) << ind.Shape();
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    TensorPtr de_expected_item;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_item));
    mindspore::MSTensor ms_expected_item =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
    std::vector<int64_t> ind_shape = ind.Shape();
    std::vector<int64_t> ms_expected_shape = ms_expected_item.Shape();
    EXPECT_EQ(ind_shape, ms_expected_shape);

    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }

  EXPECT_EQ(i, 7);

  // Manually terminate the pipeline
  iter->Stop();
 }

 /// Feature: CharNGram
 /// Description: test with all parameters in function BuildFromFile and `unknown_init` in function Lookup
 /// Expectation: return correct MSTensor which is equal to the excepted
 TEST_F(MindDataTestPipeline, TestCharNGramUnknownInit) {
  // Test with two parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCharNGramUnknownInit.";

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  std::string vectors_dir = datasets_root_path_ + "/testVectors/char_n_gram_20.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir, 18);
  EXPECT_EQ(s, Status::OK());

  std::vector<float> unknown_init(5, -1);
  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(char_n_gram, unknown_init);
  EXPECT_NE(lookup, nullptr);

  // Create Map operation on ds
  ds = ds->Map({lookup}, {"text"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

  uint64_t i = 0;
  std::vector<std::vector<float>> expected = {{-1,-1,-1,-1,-1},
                                              {-1,-1,-1,-1,-1},
                                              {-0.155665,0.664073,-0.538499,1.22657,-0.2162},
                                              {0.657201,2.11761,-1.59276,0.432072,1.21395},
                                              {-1,-1,-1,-1,-1},
                                              {-2.26956,0.288491,-0.740001,0.661703,0.147355},
                                              {-1,-1,-1,-1,-1}};
  while (row.size() != 0) {
    auto ind = row["text"];
    MS_LOG(INFO) << ind.Shape();
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    TensorPtr de_expected_item;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_item));
    mindspore::MSTensor ms_expected_item =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
    std::vector<int64_t> ind_shape = ind.Shape();
    std::vector<int64_t> ms_expected_shape = ms_expected_item.Shape();
    EXPECT_EQ(ind_shape, ms_expected_shape);

    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }

  EXPECT_EQ(i, 7);

  // Manually terminate the pipeline
  iter->Stop();
 }

 /// Feature: CharNGram
 /// Description: test with all parameters which include `path` and `max_vectors` in function BuildFromFile and `token`,
 ///     `unknown_init` and `lower_case_backup` in function Lookup. But some tokens have some big letters
 /// Expectation: return correct MSTensor which is equal to the excepted
 TEST_F(MindDataTestPipeline, TestCharNGramAllParams) {
  // Test with all parameters.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCharNGramAllParams.";
  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words_with_big_letter.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  std::string vectors_dir = datasets_root_path_ + "/testVectors/char_n_gram_20.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir);
  EXPECT_EQ(s, Status::OK());

  std::vector<float> unknown_init(5, -1);
  std::shared_ptr<TensorTransform> lookup = std::make_shared<text::ToVectors>(char_n_gram, unknown_init, true);
  EXPECT_NE(lookup, nullptr);

  // Create Map operation on ds
  ds = ds->Map({lookup}, {"text"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

  uint64_t i = 0;
  std::vector<std::vector<float>> expected = {{-1,-1,-1,-1,-1},
                                              {-1,-1,-1,-1,-1},
                                              {0.117336,0.362446,-0.983326,0.939264,-0.05648},
                                              {0.657201,2.11761,-1.59276,0.432072,1.21395},
                                              {-1,-1,-1,-1,-1},
                                              {-2.26956,0.288491,-0.740001,0.661703,0.147355},
                                              {-1,-1,-1,-1,-1}};
  while (row.size() != 0) {
    auto ind = row["text"];
    MS_LOG(INFO) << ind.Shape();
    TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind);
    TensorPtr de_expected_item;
    ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_item));
    mindspore::MSTensor ms_expected_item =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_item));
    std::vector<int64_t> ind_shape = ind.Shape();
    std::vector<int64_t> ms_expected_shape = ms_expected_item.Shape();
    EXPECT_EQ(ind_shape, ms_expected_shape);

    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }

  EXPECT_EQ(i, 7);

  // Manually terminate the pipeline
  iter->Stop();
 }

 /// Feature: CharNGram
 /// Description: test with pre-vectors set that have the different dimension
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestCharNGramDifferentDimension) {
  // Tokens don't have the same number of vectors.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCharNGramDifferentDimension.";

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  std::string vectors_dir = datasets_root_path_ + "/testVectors/char_n_gram_20_dim_different.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }

 /// Feature: CharNGram
 /// Description: test with the parameter max_vectors that is <= 0
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestCharNGramMaxVectorsLessThanZero) {
  // Test with max_vectors <= 0.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCharNGramMaxVectorsLessThanZero.";

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  std::string vectors_dir = datasets_root_path_ + "/testVectors/char_n_gram_20.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir, -1);
  EXPECT_NE(s, Status::OK());
 }

 /// Feature: CharNGram
 /// Description: test with the pre-vectors file that is empty
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestCharNGramWithEmptyFile) {
  // Read empty file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCharNGramWithEmptyFile.";

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  std::string vectors_dir = datasets_root_path_ + "/testVectors/vectors_empty.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }

 /// Feature: CharNGram
 /// Description: test with the pre-vectors file that is not exist
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestCharNGramsWithNotExistFile) {
  // Test with not exist file.
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCharNGramsWithNotExistFile.";

  // Create a TextFile dataset
  std::string data_file = datasets_root_path_ + "/testVectors/words.txt";
  std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
  EXPECT_NE(ds, nullptr);

  std::string vectors_dir = datasets_root_path_ + "/testVectors/no_vectors.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir);
  EXPECT_NE(s, Status::OK());
 }
--- a/tests/ut/cpp/dataset/execute_test.cc
+++ b/tests/ut/cpp/dataset/execute_test.cc
@@ -23,6 +23,7 @@
 #include "minddata/dataset/include/dataset/vision.h"
 #include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/text.h"
 #include "minddata/dataset/text/char_n_gram.h"
 #include "minddata/dataset/text/fast_text.h"
 #include "minddata/dataset/text/glove.h"
 #include "minddata/dataset/text/vectors.h"
@@ -30,6 +31,7 @@

 using namespace mindspore::dataset;
 using mindspore::LogStream;
 using mindspore::dataset::CharNGram;
 using mindspore::dataset::FastText;
 using mindspore::dataset::GloVe;
 using mindspore::dataset::Vectors;
@@ -1937,6 +1939,142 @@ TEST_F(MindDataTestExecute, TestToVectorsWithInvalidParamForGloVe) {
  EXPECT_FALSE(status02.IsOk());
 }

 /// Feature: CharNGram
 /// Description: test basic usage of CharNGram and the ToVectors with default parameter
 /// Expectation: get correct MSTensor
 TEST_F(MindDataTestExecute, TestCharNGramParam) {
  MS_LOG(INFO) << "Doing MindDataTestExecute-TestCharNGramParam.";
  std::shared_ptr<Tensor> de_tensor;
  Tensor::CreateScalar<std::string>("the", &de_tensor);
  auto token = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
  mindspore::MSTensor lookup_result;

  // Create expected output.
  std::shared_ptr<Tensor> de_expected01;
  std::vector<float> expected01 = {-0.840079,-0.0270003,-0.833472,0.588367,-0.210012};
  ASSERT_OK(Tensor::CreateFromVector(expected01, &de_expected01));
  auto ms_expected01 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected01));
  std::shared_ptr<Tensor> de_expected02;
  std::vector<float> expected02 = {-1.34122,0.0442693,-0.48697,0.662939,-0.367669};
  ASSERT_OK(Tensor::CreateFromVector(expected02, &de_expected02));
  auto ms_expected02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected02));

  // Transform params.
  std::string vectors_dir = "data/dataset/testVectors/char_n_gram_20.txt";
  std::shared_ptr<CharNGram> char_n_gram01;
  Status s01 = CharNGram::BuildFromFile(&char_n_gram01, vectors_dir);
  EXPECT_EQ(s01, Status::OK());
  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(char_n_gram01);
  auto transform01 = Execute({to_vectors01});
  Status status01 = transform01(token, &lookup_result);
  EXPECT_EQ(lookup_result.Shape(), ms_expected01.Shape());
  EXPECT_TRUE(status01.IsOk());

  std::shared_ptr<CharNGram> char_n_gram02;
  Status s02 = CharNGram::BuildFromFile(&char_n_gram02, vectors_dir, 100);
  EXPECT_EQ(s02, Status::OK());
  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(char_n_gram02);
  auto transform02 = Execute({to_vectors02});
  Status status02 = transform02(token, &lookup_result);
  EXPECT_EQ(lookup_result.Shape(), ms_expected01.Shape());
  EXPECT_TRUE(status02.IsOk());

  std::shared_ptr<CharNGram> char_n_gram03;
  Status s03 = CharNGram::BuildFromFile(&char_n_gram03, vectors_dir, 18);
  EXPECT_EQ(s03, Status::OK());
  std::shared_ptr<TensorTransform> to_vectors03 = std::make_shared<text::ToVectors>(char_n_gram03);
  auto transform03 = Execute({to_vectors03});
  Status status03 = transform03(token, &lookup_result);
  EXPECT_EQ(lookup_result.Shape(), ms_expected02.Shape());
  EXPECT_TRUE(status03.IsOk());
 }

 /// Feature: CharNGram
 /// Description: test basic usage of ToVectors and the CharNGram with default parameter
 /// Expectation: get correct MSTensor
 TEST_F(MindDataTestExecute, TestToVectorsParamForCharNGram) {
  MS_LOG(INFO) << "Doing MindDataTestExecute-TestToVectorsParamForCharNGram.";
  std::shared_ptr<Tensor> de_tensor01;
  Tensor::CreateScalar<std::string>("none", &de_tensor01);
  auto token01 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor01));
  std::shared_ptr<Tensor> de_tensor02;
  Tensor::CreateScalar<std::string>("the", &de_tensor02);
  auto token02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor02));
  std::shared_ptr<Tensor> de_tensor03;
  Tensor::CreateScalar<std::string>("The", &de_tensor03);
  auto token03 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor03));
  mindspore::MSTensor lookup_result;

  // Create expected output.
  std::shared_ptr<Tensor> de_expected01;
  std::vector<float> expected01(5, 0);
  ASSERT_OK(Tensor::CreateFromVector(expected01, &de_expected01));
  auto ms_expected01 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected01));
  std::shared_ptr<Tensor> de_expected02;
  std::vector<float> expected02(5, -1);
  ASSERT_OK(Tensor::CreateFromVector(expected02, &de_expected02));
  auto ms_expected02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected02));
  std::shared_ptr<Tensor> de_expected03;
  std::vector<float> expected03 = {-0.840079,-0.0270003,-0.833472,0.588367,-0.210012};
  ASSERT_OK(Tensor::CreateFromVector(expected03, &de_expected03));
  auto ms_expected03 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected03));

  // Transform params.
  std::string vectors_dir = "data/dataset/testVectors/char_n_gram_20.txt";
  std::shared_ptr<CharNGram> char_n_gram;
  Status s = CharNGram::BuildFromFile(&char_n_gram, vectors_dir);
  EXPECT_EQ(s, Status::OK());

  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(char_n_gram);
  auto transform01 = Execute({to_vectors01});
  Status status01 = transform01(token01, &lookup_result);
  EXPECT_EQ(lookup_result.Shape(), ms_expected01.Shape());
  EXPECT_TRUE(status01.IsOk());
  std::vector<float> unknown_init(5, -1);
  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(char_n_gram, unknown_init);
  auto transform02 = Execute({to_vectors02});
  Status status02 = transform02(token01, &lookup_result);
  EXPECT_EQ(lookup_result.Shape(), ms_expected02.Shape());
  EXPECT_TRUE(status02.IsOk());
  std::shared_ptr<TensorTransform> to_vectors03 = std::make_shared<text::ToVectors>(char_n_gram, unknown_init);
  auto transform03 = Execute({to_vectors03});
  Status status03 = transform03(token02, &lookup_result);
  EXPECT_EQ(lookup_result.Shape(), ms_expected03.Shape());
  EXPECT_TRUE(status03.IsOk());
  std::shared_ptr<TensorTransform> to_vectors04 = std::make_shared<text::ToVectors>(char_n_gram, unknown_init, true);
  auto transform04 = Execute({to_vectors04});
  Status status04 = transform04(token03, &lookup_result);
  EXPECT_EQ(lookup_result.Shape(), ms_expected03.Shape());
  EXPECT_TRUE(status04.IsOk());
 }

 /// Feature: CharNGram
 /// Description: test invalid parameter of ToVectors
 /// Expectation: throw exception correctly
 TEST_F(MindDataTestExecute, TestToVectorsWithInvalidParamForCharNGram) {
  MS_LOG(INFO) << "Doing MindDataTestExecute-TestToVectorsWithInvalidParamForCharNGram.";
  std::shared_ptr<Tensor> de_tensor;
  Tensor::CreateScalar<std::string>("none", &de_tensor);
  auto token = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
  mindspore::MSTensor lookup_result;

  // Transform params.
  std::string vectors_dir = "data/dataset/testVectors/char_n_gram_20.txt";
  std::shared_ptr<CharNGram> char_n_gram01;
  Status s = CharNGram::BuildFromFile(&char_n_gram01, vectors_dir);
  EXPECT_EQ(s, Status::OK());
  std::vector<float> unknown_init(4, -1);
  std::shared_ptr<TensorTransform> to_vectors01 = std::make_shared<text::ToVectors>(char_n_gram01, unknown_init);
  auto transform01 = Execute({to_vectors01});
  Status status01 = transform01(token, &lookup_result);
  EXPECT_FALSE(status01.IsOk());
  std::shared_ptr<CharNGram> char_n_gram02 = nullptr;
  std::shared_ptr<TensorTransform> to_vectors02 = std::make_shared<text::ToVectors>(char_n_gram02);
  auto transform02 = Execute({to_vectors02});
  Status status02 = transform02(token, &lookup_result);
  EXPECT_FALSE(status02.IsOk());
 }

 // Feature: DBToAmplitude
 // Description: test DBToAmplitude in eager mode
 // Expectation: the data is processed successfully
--- a/tests/ut/data/dataset/testVectors/char_n_gram_20.txt
+++ b/tests/ut/data/dataset/testVectors/char_n_gram_20.txt
@@ -0,0 +1,20 @@
 1gram-e -0.655379 0.574261 -0.714026 -0.148858 -0.0534275
 1gram-a -0.288984 -0.225616 0.323913 -0.261039 -0.0628034
 1gram-t 0.408448 0.175862 -0.296873 -0.209094 -0.53478
 1gram-i 0.278486 -0.910641 -0.743681 -0.734405 0.519959
 1gram-n -0.0712582 0.0898121 -1.12567 -0.815067 -0.435836
 1gram-o -0.182786 0.535789 -0.391385 0.181972 0.317399
 1gram-r 0.68474 0.103464 0.201631 -0.65319 0.554142
 1gram-s -0.175988 -0.813322 0.465603 -0.0951031 0.193374
 1gram-h -0.39348 -0.678079 0.233101 0.431805 2.04905
 1gram-l -0.451299 -0.268223 -0.787034 -0.991984 0.251244
 1gram-d 0.799629 -0.326191 -0.474959 0.235657 0.796227
 2gram-e#END# -2.26956 0.288491 -0.740001 0.661703 0.147355
 1gram-c -0.0413309 0.436135 -0.835305 -1.64429 -1.08329
 2gram-s#END# 0.657201 2.11761 -1.59276 0.432072 1.21395
 1gram-u -0.25203 -0.176365 -0.263038 -0.995372 -1.24916
 2gram-#BEGIN#t -0.96853 -0.789463 0.515762 2.02107 -1.64635
 1gram-m 0.422293 -0.149725 -0.734202 1.27342 0.232722
 2gram-he -0.785562 0.63378 -1.23667 -0.693956 0.395988
 2gram-th 0.663336 -0.240809 -1.87298 0.364651 0.26296
 2gram-n#END# -0.149612 -0.664577 -1.12344 2.23695 0.610406
--- a/tests/ut/data/dataset/testVectors/char_n_gram_20_dim_different.txt
+++ b/tests/ut/data/dataset/testVectors/char_n_gram_20_dim_different.txt
@@ -0,0 +1,20 @@
 1gram-e -0.655379 0.574261 -0.714026 -0.148858 -0.0534275
 1gram-a -0.288984 -0.225616 0.323913 -0.261039 -0.0628034
 1gram-t 0.408448 0.175862 -0.296873 -0.209094 -0.53478
 1gram-i 0.278486 -0.910641 -0.743681 -0.734405 0.519959
 1gram-n -0.0712582 0.0898121 -1.12567 -0.815067 -0.435836
 1gram-o -0.182786 0.535789 -0.391385 0.181972 0.317399
 1gram-r 0.68474 0.103464 0.201631 -0.65319 0.554142
 1gram-s -0.175988 -0.813322 0.465603 -0.0951031 0.193374
 1gram-h -0.39348 -0.678079 0.233101 0.431805 2.04905
 1gram-l -0.451299 -0.268223 -0.787034 -0.991984 0.251244
 1gram-d 0.799629 -0.326191 -0.474959 0.235657 0.796227
 2gram-e#END# -2.26956 0.288491 -0.740001 0.661703 0.147355
 1gram-c -0.0413309 0.436135 -0.835305 -1.64429 -1.08329
 2gram-s#END# 0.657201 2.11761 -1.59276 0.432072 1.21395
 1gram-u -0.25203 -0.176365 -0.263038 -0.995372 -1.24916
 2gram-#BEGIN#t -0.96853 -0.789463 0.515762 2.02107
 1gram-m 0.422293 -0.149725 -0.734202 1.27342 0.232722
 2gram-he -0.785562 0.63378 -1.23667 -0.693956 0.395988
 2gram-th 0.663336 -0.240809 -1.87298 0.364651 0.26296
 2gram-n#END# -0.149612 -0.664577 -1.12344 2.23695 0.610406
--- a/tests/ut/data/dataset/testVectors/words_with_big_letter.txt
+++ b/tests/ut/data/dataset/testVectors/words_with_big_letter.txt
@@ -1,7 +1,7 @@
 ok
 !
 This
 iS
 my
 HOME
 .
 ok
 !
 This
 iS
 my
 HOME
 .
--- a/tests/ut/python/dataset/test_char_n_gram.py
+++ b/tests/ut/python/dataset/test_char_n_gram.py
@@ -0,0 +1,217 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================

 import numpy as np
 import pytest

 from mindspore import log
 import mindspore.dataset as ds
 import mindspore.dataset.text as text
 import mindspore.dataset.text.transforms as T

 DATASET_ROOT_PATH = "../data/dataset/testVectors/"


 def _count_unequal_element(data_expected, data_me, rtol, atol):
    assert data_expected.shape == data_me.shape
    total_count = len(data_expected.flatten())
    error = np.abs(data_expected - data_me)
    greater = np.greater(error, atol + np.abs(data_expected)*rtol)
    loss_count = np.count_nonzero(greater)
    assert (loss_count/total_count) < rtol,\
        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".\
        format(data_expected[greater], data_me[greater], error[greater])


 def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True):
    if np.any(np.isnan(data_expected)):
        assert np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan)
    elif not np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan):
        _count_unequal_element(data_expected, data_me, rtol, atol)
    else:
        assert True


 def test_char_n_gram_all_to_vectors_params_eager():
    """
    Feature: CharNGram
    Description: test with all parameters which include `unk_init`
        and `lower_case_backup` in function ToVectors in eager mode
    Expectation: output is equal to the expected value
    """
    char_n_gram = text.CharNGram.from_file(DATASET_ROOT_PATH + "char_n_gram_20.txt", max_vectors=18)
    unk_init = (-np.ones(5)).tolist()
    to_vectors = T.ToVectors(char_n_gram, unk_init=unk_init, lower_case_backup=True)
    result1 = to_vectors("THE")
    result2 = to_vectors(".")
    result3 = to_vectors("To")
    res = [[-1.34121733e+00, 4.42693333e-02, -4.86969667e-01, 6.62939000e-01, -3.67669000e-01],
           [-1.00000000e+00, -1.00000000e+00, -1.00000000e+00, -1.00000000e+00, -1.00000000e+00],
           [-9.68530000e-01, -7.89463000e-01, 5.15762000e-01, 2.02107000e+00, -1.64635000e+00]]
    res_array = np.array(res, dtype=np.float32)

    allclose_nparray(res_array[0], result1, 0.0001, 0.0001)
    allclose_nparray(res_array[1], result2, 0.0001, 0.0001)
    allclose_nparray(res_array[2], result3, 0.0001, 0.0001)


 def test_char_n_gram_build_from_file():
    """
    Feature: CharNGram
    Description: test with only default parameter
    Expectation: output is equal to the expected value
    """
    char_n_gram = text.CharNGram.from_file(DATASET_ROOT_PATH + "char_n_gram_20.txt")
    to_vectors = text.ToVectors(char_n_gram)
    data = ds.TextFileDataset(DATASET_ROOT_PATH + "words.txt", shuffle=False)
    data = data.map(operations=to_vectors, input_columns=["text"])
    ind = 0
    res = [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0.117336, 0.362446, -0.983326, 0.939264, -0.05648],
           [0.657201, 2.11761, -1.59276, 0.432072, 1.21395],
           [0., 0., 0., 0., 0.],
           [-2.26956, 0.288491, -0.740001, 0.661703, 0.147355],
           [0., 0., 0., 0., 0.]]
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        allclose_nparray(res_array, d["text"], 0.0001, 0.0001)
        ind += 1


 def test_char_n_gram_all_build_from_file_params():
    """
    Feature: CharNGram
    Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile
    Expectation: output is equal to the expected value
    """
    char_n_gram = text.CharNGram.from_file(DATASET_ROOT_PATH + "char_n_gram_20.txt", max_vectors=100)
    to_vectors = text.ToVectors(char_n_gram)
    data = ds.TextFileDataset(DATASET_ROOT_PATH + "words.txt", shuffle=False)
    data = data.map(operations=to_vectors, input_columns=["text"])
    ind = 0
    res = [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0.117336, 0.362446, -0.983326, 0.939264, -0.05648],
           [0.657201, 2.11761, -1.59276, 0.432072, 1.21395],
           [0., 0., 0., 0., 0.],
           [-2.26956, 0.288491, -0.740001, 0.661703, 0.147355],
           [0., 0., 0., 0., 0.]]
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        allclose_nparray(res_array, d["text"], 0.0001, 0.0001)
        ind += 1


 def test_char_n_gram_all_build_from_file_params_eager():
    """
    Feature: CharNGram
    Description: test with all parameters which include `path` and `max_vector` in function BuildFromFile in eager mode
    Expectation: output is equal to the expected value
    """
    char_n_gram = text.CharNGram.from_file(DATASET_ROOT_PATH + "char_n_gram_20.txt", max_vectors=18)
    to_vectors = T.ToVectors(char_n_gram)
    result1 = to_vectors("the")
    result2 = to_vectors(".")
    result3 = to_vectors("to")
    res = [[-1.34121733e+00, 4.42693333e-02, -4.86969667e-01, 6.62939000e-01, -3.67669000e-01],
           [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
           [-9.68530000e-01, -7.89463000e-01, 5.15762000e-01, 2.02107000e+00, -1.64635000e+00]]
    res_array = np.array(res, dtype=np.float32)

    allclose_nparray(res_array[0], result1, 0.0001, 0.0001)
    allclose_nparray(res_array[1], result2, 0.0001, 0.0001)
    allclose_nparray(res_array[2], result3, 0.0001, 0.0001)


 def test_char_n_gram_build_from_file_eager():
    """
    Feature: CharNGram
    Description: test with only default parameter in eager mode
    Expectation: output is equal to the expected value
    """
    char_n_gram = text.CharNGram.from_file(DATASET_ROOT_PATH + "char_n_gram_20.txt")
    to_vectors = T.ToVectors(char_n_gram)
    result1 = to_vectors("the")
    result2 = to_vectors(".")
    result3 = to_vectors("to")
    res = [[-8.40079000e-01, -2.70002500e-02, -8.33472250e-01, 5.88367000e-01, -2.10011750e-01],
           [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
           [-9.68530000e-01, -7.89463000e-01, 5.15762000e-01, 2.02107000e+00, -1.64635000e+00]]
    res_array = np.array(res, dtype=np.float32)

    allclose_nparray(res_array[0], result1, 0.0001, 0.0001)
    allclose_nparray(res_array[1], result2, 0.0001, 0.0001)
    allclose_nparray(res_array[2], result3, 0.0001, 0.0001)


 def test_char_n_gram_invalid_input():
    """
    Feature: CharNGram
    Description: test the validate function with invalid parameters.
    Expectation: Verification of correct error message for invalid input.
    """
    def test_invalid_input(test_name, file_path, error, error_msg, max_vectors=None,
                           unk_init=None, lower_case_backup=False, token="ok"):
        log.info("Test CharNGram with wrong input: {0}".format(test_name))
        with pytest.raises(error) as error_info:
            char_n_gram = text.CharNGram.from_file(file_path, max_vectors=max_vectors)
            to_vectors = T.ToVectors(char_n_gram, unk_init=unk_init, lower_case_backup=lower_case_backup)
            to_vectors(token)
        assert error_msg in str(error_info.value)

    test_invalid_input("Not all vectors have the same number of dimensions",
                       DATASET_ROOT_PATH + "char_n_gram_20_dim_different.txt", error=RuntimeError,
                       error_msg="all vectors must have the same number of dimensions, " +
                       "but got dim 4 while expecting 5")
    test_invalid_input("the file is empty.", DATASET_ROOT_PATH + "vectors_empty.txt",
                       error=RuntimeError, error_msg="invalid file, file is empty.")
    test_invalid_input("the count of `unknown_init`'s element is different with word vector.",
                       DATASET_ROOT_PATH + "char_n_gram_20.txt",
                       error=RuntimeError, error_msg="unk_init must be the same length as vectors, " +
                       "but got unk_init: 6 and vectors: 5", unk_init=np.ones(6).tolist())
    test_invalid_input("The file not exist", DATASET_ROOT_PATH + "not_exist.txt", RuntimeError,
                       error_msg="get real path failed")
    test_invalid_input("max_vectors parameter must be greater than 0",
                       DATASET_ROOT_PATH + "char_n_gram_20.txt", error=ValueError,
                       error_msg="Input max_vectors is not within the required interval", max_vectors=-1)
    test_invalid_input("invalid max_vectors parameter type as a float",
                       DATASET_ROOT_PATH + "char_n_gram_20.txt", error=TypeError,
                       error_msg="Argument max_vectors with value 1.0 is not of type [<class 'int'>],"
                       " but got <class 'float'>.", max_vectors=1.0)
    test_invalid_input("invalid max_vectors parameter type as a string",
                       DATASET_ROOT_PATH + "char_n_gram_20.txt", error=TypeError,
                       error_msg="Argument max_vectors with value 1 is not of type [<class 'int'>],"
                       " but got <class 'str'>.", max_vectors="1")
    test_invalid_input("invalid token parameter type as a float",
                       DATASET_ROOT_PATH + "char_n_gram_20.txt", error=RuntimeError,
                       error_msg="input tensor type should be string.", token=1.0)
    test_invalid_input("invalid lower_case_backup parameter type as a string", DATASET_ROOT_PATH + "char_n_gram_20.txt",
                       error=TypeError, error_msg="Argument lower_case_backup with " +
                       "value True is not of type [<class 'bool'>],"
                       " but got <class 'str'>.", lower_case_backup="True")
    test_invalid_input("invalid lower_case_backup parameter type as a string", DATASET_ROOT_PATH + "char_n_gram_20.txt",
                       error=TypeError, error_msg="Argument lower_case_backup with " +
                       "value True is not of type [<class 'bool'>],"
                       " but got <class 'str'>.", lower_case_backup="True")


 if __name__ == '__main__':
    test_char_n_gram_all_to_vectors_params_eager()
    test_char_n_gram_build_from_file()
    test_char_n_gram_all_build_from_file_params()
    test_char_n_gram_all_build_from_file_params_eager()
    test_char_n_gram_build_from_file_eager()
    test_char_n_gram_invalid_input()
--- a/tests/ut/python/dataset/test_fast_text.py
+++ b/tests/ut/python/dataset/test_fast_text.py
@@ -42,7 +42,6 @@ def test_fast_text_all_build_from_file_params():
           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
           [0, 0, 0, 0, 0, 0]]
    print(data)
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        assert np.array_equal(res_array, d["text"]), ind
@@ -135,7 +134,6 @@ def test_fast_text_build_from_file():
           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
           [0, 0, 0, 0, 0, 0]]
    print(data)
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        assert np.array_equal(res_array, d["text"]), ind
--- a/tests/ut/python/dataset/test_glove.py
+++ b/tests/ut/python/dataset/test_glove.py
@@ -42,7 +42,6 @@ def test_glove_all_build_from_file_params():
           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
           [0, 0, 0, 0, 0, 0]]
    print(data)
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        assert np.array_equal(res_array, d["text"]), ind
@@ -135,7 +134,6 @@ def test_glove_build_from_file():
           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
           [0, 0, 0, 0, 0, 0]]
    print(data)
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        assert np.array_equal(res_array, d["text"]), ind
--- a/tests/ut/python/dataset/test_vectors.py
+++ b/tests/ut/python/dataset/test_vectors.py
@@ -101,7 +101,6 @@ def test_vectors_from_file_all_buildfromfile_params():
           [0.68047, -0.039263, 0.30186, -0.17792, 0.42962, 0.032246],
           [0.26818, 0.14346, -0.27877, 0.016257, 0.11384, 0.69923],
           [0, 0, 0, 0, 0, 0]]
    print(data)
    for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        res_array = np.array(res[ind], dtype=np.float32)
        assert np.array_equal(res_array, d["text"]), ind