You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_dataset_loader.py 1.7 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import os
  2. import unittest
  3. from fastNLP.loader.dataset_loader import POSDataSetLoader, LMDataSetLoader, TokenizeDataSetLoader, \
  4. PeopleDailyCorpusLoader, ConllLoader
  5. class TestDatasetLoader(unittest.TestCase):
  6. def test_case_1(self):
  7. data = """Tom\tT\nand\tF\nJerry\tT\n.\tF\n\nHello\tT\nworld\tF\n!\tF"""
  8. lines = data.split("\n")
  9. answer = POSDataSetLoader.parse(lines)
  10. truth = [[["Tom", "and", "Jerry", "."], ["T", "F", "T", "F"]], [["Hello", "world", "!"], ["T", "F", "F"]]]
  11. self.assertListEqual(answer, truth, "POS Dataset Loader")
  12. def test_case_TokenizeDatasetLoader(self):
  13. loader = TokenizeDataSetLoader()
  14. data = loader.load("./test/data_for_tests/cws_pku_utf_8", max_seq_len=32)
  15. print("pass TokenizeDataSetLoader test!")
  16. def test_case_POSDatasetLoader(self):
  17. loader = POSDataSetLoader()
  18. data = loader.load("./test/data_for_tests/people.txt")
  19. datas = loader.load_lines("./test/data_for_tests/people.txt")
  20. print("pass POSDataSetLoader test!")
  21. def test_case_LMDatasetLoader(self):
  22. loader = LMDataSetLoader()
  23. data = loader.load("./test/data_for_tests/charlm.txt")
  24. datas = loader.load_lines("./test/data_for_tests/charlm.txt")
  25. print("pass TokenizeDataSetLoader test!")
  26. def test_PeopleDailyCorpusLoader(self):
  27. loader = PeopleDailyCorpusLoader()
  28. _, _ = loader.load("./test/data_for_tests/people_daily_raw.txt")
  29. def test_ConllLoader(self):
  30. loader = ConllLoader()
  31. _ = loader.load("./test/data_for_tests/conll_example.txt")
  32. if __name__ == '__main__':
  33. unittest.main()