You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_extcnndm.py 2.0 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # __author__="Danqing Wang"
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. # ==============================================================================
  17. import unittest
  18. import os
  19. # import sys
  20. #
  21. # sys.path.append("../../../")
  22. from fastNLP.io import DataBundle
  23. from fastNLP.io.pipe.summarization import ExtCNNDMPipe
  24. class TestRunExtCNNDMPipe(unittest.TestCase):
  25. def test_load(self):
  26. data_set_dict = {
  27. 'CNNDM': {"train": 'test/data_for_tests/cnndm.jsonl'},
  28. }
  29. vocab_size = 100000
  30. VOCAL_FILE = 'test/data_for_tests/cnndm.vocab'
  31. sent_max_len = 100
  32. doc_max_timesteps = 50
  33. dbPipe = ExtCNNDMPipe(vocab_size=vocab_size,
  34. vocab_path=VOCAL_FILE,
  35. sent_max_len=sent_max_len,
  36. doc_max_timesteps=doc_max_timesteps)
  37. dbPipe2 = ExtCNNDMPipe(vocab_size=vocab_size,
  38. vocab_path=VOCAL_FILE,
  39. sent_max_len=sent_max_len,
  40. doc_max_timesteps=doc_max_timesteps,
  41. domain=True)
  42. for k, v in data_set_dict.items():
  43. db = dbPipe.process_from_file(v)
  44. db2 = dbPipe2.process_from_file(v)
  45. # print(db2.get_dataset("train"))
  46. self.assertTrue(isinstance(db, DataBundle))
  47. self.assertTrue(isinstance(db2, DataBundle))