Browse Source

!7902 [MD]add default for sentencepiece col_names processing and vocab_size must be provided

Merge pull request !7902 from xulei/lite_test0905
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
066ff87ef4
2 changed files with 5 additions and 0 deletions
  1. +2
    -0
      mindspore/ccsrc/minddata/dataset/api/python/de_pipeline.cc
  2. +3
    -0
      mindspore/dataset/text/validators.py

+ 2
- 0
mindspore/ccsrc/minddata/dataset/api/python/de_pipeline.cc View File

@@ -1959,6 +1959,8 @@ Status DEPipeline::ParseBuildSentencePieceVocabOp(const py::dict &args, std::sha
if (!value.is_none()) {
if (key == "vocab_size") {
builder->SetVocabSize(ToInt(value));
} else if (key == "columns") {
(void)builder->SetColumnNames(ToStringVector(value));
} else if (key == "character_coverage") {
(void)builder->SetCharacterCoverage(ToFloat(value));
} else if (key == "params") {


+ 3
- 0
mindspore/dataset/text/validators.py View File

@@ -436,6 +436,9 @@ def check_from_dataset_sentencepiece(method):

if vocab_size is not None:
check_uint32(vocab_size, "vocab_size")
else:
raise TypeError("vocab_size must be provided")


if character_coverage is not None:
type_check(character_coverage, (float,), "character_coverage")


Loading…
Cancel
Save