| @@ -324,7 +324,7 @@ std::shared_ptr<Vocab> Dataset::BuildVocab(const std::vector<std::string> &colum | |||||
| // Finish building vocab by triggering GetNextRow | // Finish building vocab by triggering GetNextRow | ||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | ||||
| iter->GetNextRow(&row); | iter->GetNextRow(&row); | ||||
| if (vocab == nullptr) { | |||||
| if (vocab->vocab().empty()) { | |||||
| MS_LOG(ERROR) << "Fail to build vocab."; | MS_LOG(ERROR) << "Fail to build vocab."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -107,6 +107,9 @@ class Vocab { | |||||
| // @param std::string & word - word to be added will skip if word already exists | // @param std::string & word - word to be added will skip if word already exists | ||||
| void append_word(const std::string &word); | void append_word(const std::string &word); | ||||
| // return a read-only vocab | |||||
| const std::unordered_map<WordType, WordIdType> vocab() { return word2id_; } | |||||
| // destructor | // destructor | ||||
| ~Vocab() = default; | ~Vocab() = default; | ||||
| @@ -4456,8 +4456,8 @@ class VOCDataset(MappableDataset): | |||||
| task (str): Set the task type of reading voc data, now only support "Segmentation" or "Detection" | task (str): Set the task type of reading voc data, now only support "Segmentation" or "Detection" | ||||
| (default="Segmentation"). | (default="Segmentation"). | ||||
| mode (str): Set the data list txt file to be readed (default="train"). | mode (str): Set the data list txt file to be readed (default="train"). | ||||
| class_indexing (dict, optional): A str-to-int mapping from label name to index | |||||
| (default=None, the folder names will be sorted alphabetically and each | |||||
| class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in | |||||
| "Detection" task (default=None, the folder names will be sorted alphabetically and each | |||||
| class will be given a unique index starting from 0). | class will be given a unique index starting from 0). | ||||
| num_samples (int, optional): The number of images to be included in the dataset | num_samples (int, optional): The number of images to be included in the dataset | ||||
| (default=None, all images). | (default=None, all images). | ||||
| @@ -252,3 +252,17 @@ TEST_F(MindDataTestPipeline, TestVocabFromDatasetFail2) { | |||||
| std::numeric_limits<int64_t>::max(), {"<pad>", "<unk>"}, true); | std::numeric_limits<int64_t>::max(), {"<pad>", "<unk>"}, true); | ||||
| EXPECT_EQ(vocab, nullptr); | EXPECT_EQ(vocab, nullptr); | ||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestVocabFromDatasetFail3) { | |||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVocabFromDatasetFail3."; | |||||
| // Create a TextFile dataset | |||||
| std::string data_file = datasets_root_path_ + "/testVocab/words.txt"; | |||||
| std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create vocab from dataset | |||||
| // Expected failure: column name does not exist in ds | |||||
| std::shared_ptr<Vocab> vocab = ds->BuildVocab({"ColumnNotExist"}); | |||||
| EXPECT_EQ(vocab, nullptr); | |||||
| } | |||||