You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_preprocess.py 2.1 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import os
  2. import unittest
  3. from fastNLP.core.preprocess import SeqLabelPreprocess
  4. class TestSeqLabelPreprocess(unittest.TestCase):
  5. def test_case_1(self):
  6. data = [
  7. [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
  8. [['Hello', 'world', '!'], ['a', 'n', '.']],
  9. [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
  10. [['Hello', 'world', '!'], ['a', 'n', '.']],
  11. [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
  12. [['Hello', 'world', '!'], ['a', 'n', '.']],
  13. [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
  14. [['Hello', 'world', '!'], ['a', 'n', '.']],
  15. [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
  16. [['Hello', 'world', '!'], ['a', 'n', '.']],
  17. ]
  18. if os.path.exists("./save"):
  19. for root, dirs, files in os.walk("./save", topdown=False):
  20. for name in files:
  21. os.remove(os.path.join(root, name))
  22. for name in dirs:
  23. os.rmdir(os.path.join(root, name))
  24. result = SeqLabelPreprocess().run(train_dev_data=data, train_dev_split=0.4,
  25. pickle_path="./save")
  26. result = SeqLabelPreprocess().run(train_dev_data=data, train_dev_split=0.4,
  27. pickle_path="./save")
  28. if os.path.exists("./save"):
  29. for root, dirs, files in os.walk("./save", topdown=False):
  30. for name in files:
  31. os.remove(os.path.join(root, name))
  32. for name in dirs:
  33. os.rmdir(os.path.join(root, name))
  34. result = SeqLabelPreprocess().run(test_data=data, train_dev_data=data,
  35. pickle_path="./save", train_dev_split=0.4,
  36. cross_val=True)
  37. result = SeqLabelPreprocess().run(test_data=data, train_dev_data=data,
  38. pickle_path="./save", train_dev_split=0.4,
  39. cross_val=True)

一款轻量级的自然语言处理(NLP)工具包,目标是减少用户项目中的工程型代码,例如数据处理循环、训练循环、多卡运行等