You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_dataset_loader.py 2.0 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import unittest
  2. from fastNLP.core.dataset import DataSet
  3. from fastNLP.io.dataset_loader import POSDataSetLoader, LMDataSetLoader, TokenizeDataSetLoader, \
  4. PeopleDailyCorpusLoader, ConllLoader
  5. class TestDatasetLoader(unittest.TestCase):
  6. def test_case_1(self):
  7. data = """Tom\tT\nand\tF\nJerry\tT\n.\tF\n\nHello\tT\nworld\tF\n!\tF"""
  8. lines = data.split("\n")
  9. answer = POSDataSetLoader.parse(lines)
  10. truth = [[["Tom", "and", "Jerry", "."], ["T", "F", "T", "F"]], [["Hello", "world", "!"], ["T", "F", "F"]]]
  11. self.assertListEqual(answer, truth, "POS Dataset Loader")
  12. def test_case_TokenizeDatasetLoader(self):
  13. loader = TokenizeDataSetLoader()
  14. filepath = "./test/data_for_tests/cws_pku_utf_8"
  15. data = loader.load(filepath, max_seq_len=32)
  16. assert len(data) > 0
  17. data1 = DataSet()
  18. data1.read_tokenize(filepath, max_seq_len=32)
  19. assert len(data1) > 0
  20. print("pass TokenizeDataSetLoader test!")
  21. def test_case_POSDatasetLoader(self):
  22. loader = POSDataSetLoader()
  23. filepath = "./test/data_for_tests/people.txt"
  24. data = loader.load("./test/data_for_tests/people.txt")
  25. datas = loader.load_lines("./test/data_for_tests/people.txt")
  26. data1 = DataSet().read_pos(filepath)
  27. assert len(data1) > 0
  28. print("pass POSDataSetLoader test!")
  29. def test_case_LMDatasetLoader(self):
  30. loader = LMDataSetLoader()
  31. data = loader.load("./test/data_for_tests/charlm.txt")
  32. datas = loader.load_lines("./test/data_for_tests/charlm.txt")
  33. print("pass TokenizeDataSetLoader test!")
  34. def test_PeopleDailyCorpusLoader(self):
  35. loader = PeopleDailyCorpusLoader()
  36. _, _ = loader.load("./test/data_for_tests/people_daily_raw.txt")
  37. def test_ConllLoader(self):
  38. loader = ConllLoader()
  39. _ = loader.load("./test/data_for_tests/conll_example.txt")
  40. if __name__ == '__main__':
  41. unittest.main()