From: @cathwong Reviewed-by: @nsyca,@robingrosman Signed-off-by: @robingrosmanpull/15296/MERGE
| @@ -19,7 +19,8 @@ | |||
| #include <regex> | |||
| #include "minddata/dataset/include/text.h" | |||
| #include "mindspore/core/ir/dtype/type_id.h" | |||
| #include "minddata/dataset/core/type_id.h" | |||
| #include "minddata/dataset/text/ir/kernels/text_ir.h" | |||
| namespace mindspore { | |||
| @@ -203,16 +204,20 @@ Status JiebaTokenizer::ParserFile(const std::string &file_path, | |||
| // Lookup | |||
| struct Lookup::Data { | |||
| Data(const std::shared_ptr<Vocab> &vocab, const std::optional<std::vector<char>> &unknown_token, | |||
| const std::vector<char> &data_type) | |||
| : vocab_(vocab), unknown_token_(OptionalCharToString(unknown_token)), data_type_(CharToString(data_type)) {} | |||
| mindspore::DataType data_type) | |||
| : vocab_(vocab), | |||
| unknown_token_(OptionalCharToString(unknown_token)), | |||
| data_type_(dataset::MSTypeToDEType(static_cast<TypeId>(data_type))) {} | |||
| std::shared_ptr<Vocab> vocab_; | |||
| std::optional<std::string> unknown_token_; | |||
| std::string data_type_; | |||
| dataset::DataType data_type_; | |||
| }; | |||
| Lookup::Lookup(const std::shared_ptr<Vocab> &vocab, const std::optional<std::vector<char>> &unknown_token, | |||
| const std::vector<char> &data_type) | |||
| : data_(std::make_shared<Data>(vocab, unknown_token, data_type)) {} | |||
| mindspore::DataType data_type) | |||
| : data_(std::make_shared<Data>(vocab, unknown_token, data_type)) { | |||
| data_->data_type_ = dataset::MSTypeToDEType(static_cast<TypeId>(data_type)); | |||
| } | |||
| std::shared_ptr<TensorOperation> Lookup::Parse() { | |||
| return std::make_shared<LookupOperation>(data_->vocab_, data_->unknown_token_, data_->data_type_); | |||
| @@ -331,11 +336,12 @@ std::shared_ptr<TensorOperation> SlidingWindow::Parse() { | |||
| // ToNumber | |||
| struct ToNumber::Data { | |||
| explicit Data(const std::vector<char> &data_type) : data_type_(CharToString(data_type)) {} | |||
| std::string data_type_; | |||
| dataset::DataType data_type_; | |||
| }; | |||
| ToNumber::ToNumber(const std::vector<char> &data_type) : data_(std::make_shared<Data>(data_type)) {} | |||
| ToNumber::ToNumber(mindspore::DataType data_type) : data_(std::make_shared<Data>()) { | |||
| data_->data_type_ = dataset::MSTypeToDEType(static_cast<TypeId>(data_type)); | |||
| } | |||
| std::shared_ptr<TensorOperation> ToNumber::Parse() { return std::make_shared<ToNumberOperation>(data_->data_type_); } | |||
| @@ -207,13 +207,14 @@ class Lookup final : public TensorTransform { | |||
| /// \param[in] unknown_token word to use for lookup if the word being looked up is out of Vocabulary (oov). | |||
| /// If unknown_token is oov, runtime error will be thrown. If unknown_token is {}, which means that not to | |||
| /// specify unknown_token when word being out of Vocabulary (default={}). | |||
| /// \param[in] data_type type of the tensor after lookup, typically int32. | |||
| /// \param[in] data_type mindspore::DataType of the tensor after lookup; must be numeric, including bool. | |||
| /// (default=mindspore::DataType::kNumberTypeInt32). | |||
| explicit Lookup(const std::shared_ptr<Vocab> &vocab, const std::optional<std::string> &unknown_token = {}, | |||
| const std::string &data_type = "int32") | |||
| : Lookup(vocab, OptionalStringToChar(unknown_token), StringToChar(data_type)) {} | |||
| mindspore::DataType data_type = mindspore::DataType::kNumberTypeInt32) | |||
| : Lookup(vocab, OptionalStringToChar(unknown_token), data_type) {} | |||
| explicit Lookup(const std::shared_ptr<Vocab> &vocab, const std::optional<std::vector<char>> &unknown_token, | |||
| const std::vector<char> &data_type); | |||
| mindspore::DataType data_type = mindspore::DataType::kNumberTypeInt32); | |||
| /// \brief Destructor | |||
| ~Lookup() = default; | |||
| @@ -405,10 +406,8 @@ class SlidingWindow final : public TensorTransform { | |||
| class ToNumber final : public TensorTransform { | |||
| public: | |||
| /// \brief Constructor. | |||
| /// \param[in] data_type of the tensor to be cast to. Must be a numeric type. | |||
| explicit ToNumber(const std::string &data_type) : ToNumber(StringToChar(data_type)) {} | |||
| explicit ToNumber(const std::vector<char> &data_type); | |||
| /// \param[in] data_type mindspore::DataType of the tensor to be cast to. Must be a numeric type, excluding bool. | |||
| explicit ToNumber(mindspore::DataType data_type); | |||
| /// \brief Destructor | |||
| ~ToNumber() = default; | |||
| @@ -42,6 +42,7 @@ | |||
| #include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h" | |||
| #endif | |||
| #include "minddata/dataset/core/data_type.h" | |||
| #include "minddata/dataset/core/type_id.h" | |||
| #include "minddata/dataset/util/path.h" | |||
| #include "minddata/dataset/text/ir/validators.h" | |||
| @@ -166,10 +167,20 @@ Status JiebaTokenizerOperation::AddWord(const std::string &word, int64_t freq) { | |||
| } | |||
| // LookupOperation | |||
| // DataType data_type - required for C++ API | |||
| LookupOperation::LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::optional<std::string> &unknown_token, | |||
| const std::string &data_type) | |||
| DataType data_type) | |||
| : vocab_(vocab), unknown_token_(unknown_token), default_id_(Vocab::kNoTokenExists), data_type_(data_type) {} | |||
| // std::string data_type - required for Pybind | |||
| LookupOperation::LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::optional<std::string> &unknown_token, | |||
| const std::string &data_type) | |||
| : vocab_(vocab), unknown_token_(unknown_token), default_id_(Vocab::kNoTokenExists) { | |||
| // Convert from string to DEType | |||
| DataType temp_data_type(data_type); | |||
| data_type_ = temp_data_type; | |||
| } | |||
| LookupOperation::~LookupOperation() = default; | |||
| Status LookupOperation::ValidateParams() { | |||
| @@ -187,8 +198,9 @@ Status LookupOperation::ValidateParams() { | |||
| } | |||
| } | |||
| if (!IsTypeNumeric(data_type_)) { | |||
| std::string err_msg = "Lookup does not support a string to string mapping, data_type can only be numeric."; | |||
| if (!data_type_.IsNumeric()) { | |||
| // Note: For DEType, Bool is counted as numeric, and is a valid type for Lookup | |||
| std::string err_msg = "Lookup : The parameter data_type must be numeric including bool."; | |||
| MS_LOG(ERROR) << err_msg; | |||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||
| } | |||
| @@ -351,11 +363,20 @@ std::shared_ptr<TensorOp> SlidingWindowOperation::Build() { | |||
| } | |||
| // ToNumberOperation | |||
| ToNumberOperation::ToNumberOperation(std::string data_type) : data_type_(data_type) {} | |||
| // DataType data_type - required for C++ API | |||
| ToNumberOperation::ToNumberOperation(DataType data_type) : data_type_(data_type) {} | |||
| // std::string data_type - required for Pybind | |||
| ToNumberOperation::ToNumberOperation(std::string data_type) { | |||
| // Convert from string to DEType | |||
| DataType temp_data_type(data_type); | |||
| data_type_ = temp_data_type; | |||
| } | |||
| Status ToNumberOperation::ValidateParams() { | |||
| if (!IsTypeNumeric(data_type_) || IsTypeBoolean(data_type_)) { | |||
| std::string err_msg = "ToNumber : The parameter data_type must be a numeric type, got: " + data_type_; | |||
| if (!data_type_.IsNumeric() || data_type_.IsBool()) { | |||
| // Note: For DEType, Bool is counted as numeric, but is not a valid type for ToNumber. | |||
| std::string err_msg = "ToNumber : The parameter data_type must be numeric and excludes bool."; | |||
| MS_LOG(ERROR) << err_msg; | |||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||
| } | |||
| @@ -368,6 +389,13 @@ std::shared_ptr<TensorOp> ToNumberOperation::Build() { | |||
| return tensor_op; | |||
| } | |||
| Status ToNumberOperation::to_json(nlohmann::json *out_json) { | |||
| nlohmann::json args; | |||
| args["data_type"] = data_type_.ToString(); | |||
| *out_json = args; | |||
| return Status::OK(); | |||
| } | |||
| // TruncateSequencePairOperation | |||
| TruncateSequencePairOperation::TruncateSequencePairOperation(int32_t max_length) : max_length_(max_length) {} | |||
| @@ -142,7 +142,9 @@ class JiebaTokenizerOperation : public TensorOperation { | |||
| class LookupOperation : public TensorOperation { | |||
| public: | |||
| explicit LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::optional<std::string> &unknown_token, | |||
| const std::string &data_type); | |||
| DataType data_type); // Used for C++ API | |||
| explicit LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::optional<std::string> &unknown_token, | |||
| const std::string &data_type); // Used for Pybind | |||
| ~LookupOperation(); | |||
| @@ -156,7 +158,7 @@ class LookupOperation : public TensorOperation { | |||
| std::shared_ptr<Vocab> vocab_; | |||
| std::optional<std::string> unknown_token_; | |||
| int32_t default_id_; | |||
| std::string data_type_; | |||
| DataType data_type_; | |||
| }; | |||
| class NgramOperation : public TensorOperation { | |||
| @@ -273,7 +275,8 @@ class SlidingWindowOperation : public TensorOperation { | |||
| class ToNumberOperation : public TensorOperation { | |||
| public: | |||
| explicit ToNumberOperation(std::string data_type); | |||
| explicit ToNumberOperation(DataType data_type); // Used for C++ API | |||
| explicit ToNumberOperation(std::string data_type); // Used for Pybind | |||
| ~ToNumberOperation() = default; | |||
| @@ -283,8 +286,10 @@ class ToNumberOperation : public TensorOperation { | |||
| std::string Name() const override { return kToNumberOperation; } | |||
| Status to_json(nlohmann::json *out_json) override; | |||
| private: | |||
| std::string data_type_; | |||
| DataType data_type_; | |||
| }; | |||
| class TruncateSequencePairOperation : public TensorOperation { | |||
| @@ -89,6 +89,9 @@ TEST_F(MindDataTestPipeline, TestSentencePieceVocabSuccess1) { | |||
| } | |||
| EXPECT_EQ(i, 1); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestSentencePieceVocabSuccess2) { | |||
| @@ -149,6 +152,9 @@ TEST_F(MindDataTestPipeline, TestSentencePieceVocabSuccess2) { | |||
| } | |||
| EXPECT_EQ(i, 1); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestSentencePieceVocabFail) { | |||
| @@ -1541,7 +1541,7 @@ TEST_F(MindDataTestPipeline, TestToNumberSuccess1) { | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create ToNumber operation on ds | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>("int64"); | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>(mindspore::DataType::kNumberTypeInt64); | |||
| EXPECT_NE(to_number, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -1596,7 +1596,7 @@ TEST_F(MindDataTestPipeline, TestToNumberSuccess2) { | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create ToNumber operation on ds | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>("float64"); | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>(mindspore::DataType::kNumberTypeFloat64); | |||
| EXPECT_NE(to_number, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -1651,7 +1651,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail1) { | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create ToNumber operation on ds | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>("int8"); | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>(mindspore::DataType::kNumberTypeInt8); | |||
| EXPECT_NE(to_number, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -1701,7 +1701,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail2) { | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create ToNumber operation on ds | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>("float16"); | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>(mindspore::DataType::kNumberTypeFloat16); | |||
| EXPECT_NE(to_number, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -1747,7 +1747,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail3) { | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create ToNumber operation on ds | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>("int64"); | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>(mindspore::DataType::kNumberTypeInt64); | |||
| EXPECT_NE(to_number, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -1789,7 +1789,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail4) { | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create ToNumber operation on ds | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>("string"); | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>(mindspore::DataType::kObjectTypeString); | |||
| EXPECT_NE(to_number, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -1812,7 +1812,7 @@ TEST_F(MindDataTestPipeline, TestToNumberFail5) { | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create ToNumber operation on ds | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>("bool"); | |||
| std::shared_ptr<TensorTransform> to_number = std::make_shared<text::ToNumber>(mindspore::DataType::kNumberTypeBool); | |||
| EXPECT_NE(to_number, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -36,10 +36,10 @@ class MindDataTestPipeline : public UT::DatasetOpTesting { | |||
| }; | |||
| // Macro to compare 2 MSTensors as not equal; compare datasize only | |||
| #define EXPECT_MSTENSOR_DATA_NE(_mstensor1, _mstensor2) \ | |||
| do { \ | |||
| EXPECT_NE(_mstensor1.DataSize(), _mstensor2.DataSize()); \ | |||
| } while (false) | |||
| #define EXPECT_MSTENSOR_DATA_NE(_mstensor1, _mstensor2) \ | |||
| do { \ | |||
| EXPECT_NE(_mstensor1.DataSize(), _mstensor2.DataSize()); \ | |||
| } while (false) | |||
| TEST_F(MindDataTestPipeline, TestVocabLookupOp) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVocabLookupOp."; | |||
| @@ -56,7 +56,8 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOp) { | |||
| EXPECT_EQ(s, Status::OK()); | |||
| // Create Lookup operation on ds | |||
| std::shared_ptr<TensorTransform> lookup = std::make_shared<text::Lookup>(vocab, "<unk>", "int32"); | |||
| std::shared_ptr<TensorTransform> lookup = | |||
| std::make_shared<text::Lookup>(vocab, "<unk>", mindspore::DataType::kNumberTypeInt32); | |||
| EXPECT_NE(lookup, nullptr); | |||
| // Create Map operation on ds | |||
| @@ -87,6 +88,11 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOp) { | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 6); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVocabLookupOpEmptyString) { | |||
| @@ -104,7 +110,8 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOpEmptyString) { | |||
| EXPECT_EQ(s, Status::OK()); | |||
| // Create Lookup operation on ds | |||
| std::shared_ptr<TensorTransform> lookup = std::make_shared<text::Lookup>(vocab, "", "int32"); | |||
| std::shared_ptr<TensorTransform> lookup = | |||
| std::make_shared<text::Lookup>(vocab, "", mindspore::DataType::kNumberTypeInt32); | |||
| EXPECT_NE(lookup, nullptr); | |||
| // Create Map operation on ds | |||
| @@ -135,6 +142,60 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOpEmptyString) { | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 6); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVocabLookupBool) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVocabLookupBool."; | |||
| // Invoke Lookup with Bool data_type | |||
| // Create a TextFile dataset | |||
| std::string data_file = datasets_root_path_ + "/testVocab/words.txt"; | |||
| std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a vocab from vector | |||
| std::vector<std::string> list = {"home", "IS", "behind", "the", "world", "ahead", "!"}; | |||
| std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); | |||
| Status s = Vocab::BuildFromVector(list, {"<pad>", "<unk>"}, true, &vocab); | |||
| EXPECT_EQ(s, Status::OK()); | |||
| // Create Lookup operation on ds | |||
| std::shared_ptr<TensorTransform> lookup = | |||
| std::make_shared<text::Lookup>(vocab, "<unk>", mindspore::DataType::kNumberTypeBool); | |||
| EXPECT_NE(lookup, nullptr); | |||
| // Create Map operation on ds | |||
| ds = ds->Map({lookup}, {"text"}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| auto ind = row["text"]; | |||
| MS_LOG(INFO) << ind.Shape(); | |||
| TEST_MS_LOG_MSTENSOR(INFO, "ind: ", ind); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 6); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVocabLookupOpFail1) { | |||
| @@ -151,7 +212,8 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOpFail1) { | |||
| EXPECT_EQ(s, Status::OK()); | |||
| // Create lookup op for ds | |||
| std::shared_ptr<TensorTransform> lookup = std::make_shared<text::Lookup>(vocab, "<unk>", "int32"); | |||
| std::shared_ptr<TensorTransform> lookup = | |||
| std::make_shared<text::Lookup>(vocab, "<unk>", mindspore::DataType::kNumberTypeInt32); | |||
| EXPECT_NE(lookup, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -174,7 +236,8 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOpFail2) { | |||
| std::shared_ptr<Vocab> vocab; | |||
| // Create lookup op | |||
| std::shared_ptr<TensorTransform> lookup = std::make_shared<text::Lookup>(vocab, "", "int32"); | |||
| std::shared_ptr<TensorTransform> lookup = | |||
| std::make_shared<text::Lookup>(vocab, "", mindspore::DataType::kNumberTypeInt32); | |||
| EXPECT_NE(lookup, nullptr); | |||
| // Create a Map operation on ds | |||
| @@ -186,6 +249,33 @@ TEST_F(MindDataTestPipeline, TestVocabLookupOpFail2) { | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVocabLookupOpFail3DataType) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVocabLookupOpFail3DataType."; | |||
| // Create a TextFile Dataset | |||
| std::string data_file = datasets_root_path_ + "/testVocab/words.txt"; | |||
| std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Build vocab from vector | |||
| std::vector<std::string> list = {"home", "IS", "behind", "the", "world", "ahead", "!"}; | |||
| std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); | |||
| Status s = Vocab::BuildFromVector(list, {"<pad>", "<unk>"}, true, &vocab); | |||
| EXPECT_EQ(s, Status::OK()); | |||
| // Create lookup op for ds | |||
| std::shared_ptr<TensorTransform> lookup = | |||
| std::make_shared<text::Lookup>(vocab, "", mindspore::DataType::kObjectTypeString); | |||
| EXPECT_NE(lookup, nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({lookup}); | |||
| EXPECT_NE(ds, nullptr); | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| // Expect failure: invalid Lookup input (String is not valid for data_type) | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVocabFromDataset) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVocabFromDataset."; | |||
| @@ -204,7 +294,8 @@ TEST_F(MindDataTestPipeline, TestVocabFromDataset) { | |||
| EXPECT_EQ(home_index, 4); | |||
| // Create Lookup operation on ds | |||
| std::shared_ptr<TensorTransform> lookup = std::make_shared<text::Lookup>(vocab, "<unk>", "int32"); | |||
| std::shared_ptr<TensorTransform> lookup = | |||
| std::make_shared<text::Lookup>(vocab, "<unk>", mindspore::DataType::kNumberTypeInt32); | |||
| EXPECT_NE(lookup, nullptr); | |||
| // Create Map operation on ds | |||
| @@ -235,6 +326,11 @@ TEST_F(MindDataTestPipeline, TestVocabFromDataset) { | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 6); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVocabFromDatasetDefault) { | |||
| @@ -254,6 +350,7 @@ TEST_F(MindDataTestPipeline, TestVocabFromDatasetDefault) { | |||
| EXPECT_EQ(home_index, 2); | |||
| // Create Lookup operation on ds | |||
| // Use default data_type parameter | |||
| std::shared_ptr<TensorTransform> lookup = std::make_shared<text::Lookup>(vocab, "home"); | |||
| EXPECT_NE(lookup, nullptr); | |||
| @@ -293,6 +390,11 @@ TEST_F(MindDataTestPipeline, TestVocabFromDatasetDefault) { | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 6); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVocabFromDatasetFail1) { | |||
| @@ -371,7 +473,8 @@ TEST_F(MindDataTestPipeline, TestVocabFromDatasetInt64) { | |||
| EXPECT_EQ(home_index, 2); | |||
| // Create Lookup operation on ds | |||
| std::shared_ptr<TensorTransform> lookup = std::make_shared<text::Lookup>(vocab, "home", "int64"); | |||
| std::shared_ptr<TensorTransform> lookup = | |||
| std::make_shared<text::Lookup>(vocab, "home", mindspore::DataType::kNumberTypeInt64); | |||
| EXPECT_NE(lookup, nullptr); | |||
| // Create Map operation on ds | |||
| @@ -410,4 +513,9 @@ TEST_F(MindDataTestPipeline, TestVocabFromDatasetInt64) { | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 6); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| @@ -202,7 +202,7 @@ def test_lookup_cast_type(): | |||
| assert test_config("unk") == np.dtype("int32") | |||
| # test exception, data_type isn't the correct type | |||
| assert "tldr is not of type [<class 'mindspore._c_expression.typing.Type'>]" in test_config("unk", "tldr") | |||
| assert "Lookup does not support a string to string mapping, data_type can only be numeric." in \ | |||
| assert "Lookup : The parameter data_type must be numeric including bool." in \ | |||
| test_config("w1", mstype.string) | |||