You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_dataset_loader.py 1.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import unittest
  2. from fastNLP.loader.dataset_loader import POSDatasetLoader, LMDatasetLoader, TokenizeDatasetLoader, \
  3. PeopleDailyCorpusLoader, ConllLoader
  4. class TestDatasetLoader(unittest.TestCase):
  5. def test_case_1(self):
  6. data = """Tom\tT\nand\tF\nJerry\tT\n.\tF\n\nHello\tT\nworld\tF\n!\tF"""
  7. lines = data.split("\n")
  8. answer = POSDatasetLoader.parse(lines)
  9. truth = [[["Tom", "and", "Jerry", "."], ["T", "F", "T", "F"]], [["Hello", "world", "!"], ["T", "F", "F"]]]
  10. self.assertListEqual(answer, truth, "POS Dataset Loader")
  11. def test_case_TokenizeDatasetLoader(self):
  12. loader = TokenizeDatasetLoader("./test/data_for_tests/cws_pku_utf_8")
  13. data = loader.load_pku(max_seq_len=32)
  14. print("pass TokenizeDatasetLoader test!")
  15. def test_case_POSDatasetLoader(self):
  16. loader = POSDatasetLoader("./test/data_for_tests/people.txt")
  17. data = loader.load()
  18. datas = loader.load_lines()
  19. print("pass POSDatasetLoader test!")
  20. def test_case_LMDatasetLoader(self):
  21. loader = LMDatasetLoader("./test/data_for_tests/cws_pku_utf_8")
  22. data = loader.load()
  23. datas = loader.load_lines()
  24. print("pass TokenizeDatasetLoader test!")
  25. def test_PeopleDailyCorpusLoader(self):
  26. loader = PeopleDailyCorpusLoader("./test/data_for_tests/people_daily_raw.txt")
  27. _, _ = loader.load()
  28. def test_ConllLoader(self):
  29. loader = ConllLoader("./test/data_for_tests/conll_example.txt")
  30. _ = loader.load()
  31. if __name__ == '__main__':
  32. unittest.main()