You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_vocab.py 1.2 kB

12345678910111213141516171819202122232425262728293031
  1. import unittest
  2. from fastNLP.core.vocabulary import Vocabulary, DEFAULT_WORD_TO_INDEX
  3. class TestVocabulary(unittest.TestCase):
  4. def test_vocab(self):
  5. import _pickle as pickle
  6. import os
  7. vocab = Vocabulary()
  8. filename = 'vocab'
  9. vocab.update(filename)
  10. vocab.update([filename, ['a'], [['b']], ['c']])
  11. idx = vocab[filename]
  12. before_pic = (vocab.to_word(idx), vocab[filename])
  13. with open(filename, 'wb') as f:
  14. pickle.dump(vocab, f)
  15. with open(filename, 'rb') as f:
  16. vocab = pickle.load(f)
  17. os.remove(filename)
  18. vocab.build_reverse_vocab()
  19. after_pic = (vocab.to_word(idx), vocab[filename])
  20. TRUE_DICT = {'vocab': 5, 'a': 6, 'b': 7, 'c': 8}
  21. TRUE_DICT.update(DEFAULT_WORD_TO_INDEX)
  22. TRUE_IDXDICT = {0: '<pad>', 1: '<unk>', 2: '<reserved-2>', 3: '<reserved-3>', 4: '<reserved-4>', 5: 'vocab', 6: 'a', 7: 'b', 8: 'c'}
  23. self.assertEqual(before_pic, after_pic)
  24. self.assertDictEqual(TRUE_DICT, vocab.word2idx)
  25. self.assertDictEqual(TRUE_IDXDICT, vocab.idx2word)
  26. if __name__ == '__main__':
  27. unittest.main()