Browse Source

Modify VOCDataset doc & Fix BuildVocab bug

tags/v1.0.0
luoyang 5 years ago
parent
commit
8aba39a71b
4 changed files with 20 additions and 3 deletions
  1. +1
    -1
      mindspore/ccsrc/minddata/dataset/api/datasets.cc
  2. +3
    -0
      mindspore/ccsrc/minddata/dataset/text/vocab.h
  3. +2
    -2
      mindspore/dataset/engine/datasets.py
  4. +14
    -0
      tests/ut/cpp/dataset/c_api_dataset_vocab.cc

+ 1
- 1
mindspore/ccsrc/minddata/dataset/api/datasets.cc View File

@@ -324,7 +324,7 @@ std::shared_ptr<Vocab> Dataset::BuildVocab(const std::vector<std::string> &colum
// Finish building vocab by triggering GetNextRow // Finish building vocab by triggering GetNextRow
std::unordered_map<std::string, std::shared_ptr<Tensor>> row; std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
iter->GetNextRow(&row); iter->GetNextRow(&row);
if (vocab == nullptr) {
if (vocab->vocab().empty()) {
MS_LOG(ERROR) << "Fail to build vocab."; MS_LOG(ERROR) << "Fail to build vocab.";
return nullptr; return nullptr;
} }


+ 3
- 0
mindspore/ccsrc/minddata/dataset/text/vocab.h View File

@@ -107,6 +107,9 @@ class Vocab {
// @param std::string & word - word to be added will skip if word already exists // @param std::string & word - word to be added will skip if word already exists
void append_word(const std::string &word); void append_word(const std::string &word);


// return a read-only vocab
const std::unordered_map<WordType, WordIdType> vocab() { return word2id_; }

// destructor // destructor
~Vocab() = default; ~Vocab() = default;




+ 2
- 2
mindspore/dataset/engine/datasets.py View File

@@ -4456,8 +4456,8 @@ class VOCDataset(MappableDataset):
task (str): Set the task type of reading voc data, now only support "Segmentation" or "Detection" task (str): Set the task type of reading voc data, now only support "Segmentation" or "Detection"
(default="Segmentation"). (default="Segmentation").
mode (str): Set the data list txt file to be readed (default="train"). mode (str): Set the data list txt file to be readed (default="train").
class_indexing (dict, optional): A str-to-int mapping from label name to index
(default=None, the folder names will be sorted alphabetically and each
class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in
"Detection" task (default=None, the folder names will be sorted alphabetically and each
class will be given a unique index starting from 0). class will be given a unique index starting from 0).
num_samples (int, optional): The number of images to be included in the dataset num_samples (int, optional): The number of images to be included in the dataset
(default=None, all images). (default=None, all images).


+ 14
- 0
tests/ut/cpp/dataset/c_api_dataset_vocab.cc View File

@@ -252,3 +252,17 @@ TEST_F(MindDataTestPipeline, TestVocabFromDatasetFail2) {
std::numeric_limits<int64_t>::max(), {"<pad>", "<unk>"}, true); std::numeric_limits<int64_t>::max(), {"<pad>", "<unk>"}, true);
EXPECT_EQ(vocab, nullptr); EXPECT_EQ(vocab, nullptr);
} }

TEST_F(MindDataTestPipeline, TestVocabFromDatasetFail3) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVocabFromDatasetFail3.";

// Create a TextFile dataset
std::string data_file = datasets_root_path_ + "/testVocab/words.txt";
std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);

// Create vocab from dataset
// Expected failure: column name does not exist in ds
std::shared_ptr<Vocab> vocab = ds->BuildVocab({"ColumnNotExist"});
EXPECT_EQ(vocab, nullptr);
}

Loading…
Cancel
Save