Browse Source

!3303 fix validator that checks the vocab_size is positive in vocab.from_file

Merge pull request !3303 from ZiruiWu/fix_vocab_validator
tags/v0.7.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
c24122bb55
2 changed files with 4 additions and 2 deletions
  1. +1
    -1
      mindspore/dataset/text/validators.py
  2. +3
    -1
      tests/ut/python/dataset/test_vocab.py

+ 1
- 1
mindspore/dataset/text/validators.py View File

@@ -67,7 +67,7 @@ def check_from_file(method):
check_unique_list_of_words(special_tokens, "special_tokens") check_unique_list_of_words(special_tokens, "special_tokens")
type_check_list([file_path, delimiter], (str,), ["file_path", "delimiter"]) type_check_list([file_path, delimiter], (str,), ["file_path", "delimiter"])
if vocab_size is not None: if vocab_size is not None:
check_value(vocab_size, (-1, INT32_MAX), "vocab_size")
check_positive(vocab_size, "vocab_size")
type_check(special_first, (bool,), special_first) type_check(special_first, (bool,), special_first)


return method(self, *args, **kwargs) return method(self, *args, **kwargs)


+ 3
- 1
tests/ut/python/dataset/test_vocab.py View File

@@ -133,7 +133,9 @@ def test_from_file():
assert test_config("w1 w2 w3 s1 s2 s3", 3, ["s1", "s2", "s3"], False) == [0, 1, 2, 3, 4, 5] assert test_config("w1 w2 w3 s1 s2 s3", 3, ["s1", "s2", "s3"], False) == [0, 1, 2, 3, 4, 5]
# text exception special_words contains duplicate words # text exception special_words contains duplicate words
assert "special_tokens contains duplicate" in test_config("w1", None, ["s1", "s1"], True) assert "special_tokens contains duplicate" in test_config("w1", None, ["s1", "s1"], True)

# test exception when vocab_size is negative
assert "Input vocab_size must be greater than 0" in test_config("w1 w2", 0, [], True)
assert "Input vocab_size must be greater than 0" in test_config("w1 w2", -1, [], True)


if __name__ == '__main__': if __name__ == '__main__':
test_from_dict_exception() test_from_dict_exception()


Loading…
Cancel
Save