You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_text_classification.py 5.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import shutil
  3. import unittest
  4. import zipfile
  5. from pathlib import Path
  6. from modelscope.fileio import File
  7. from modelscope.models import Model
  8. from modelscope.models.nlp import BertForSequenceClassification
  9. from modelscope.pipelines import SequenceClassificationPipeline, pipeline
  10. from modelscope.preprocessors import SequenceClassificationPreprocessor
  11. from modelscope.pydatasets import PyDataset
  12. from modelscope.utils.constant import Hubs, Tasks
  13. from modelscope.utils.hub import get_model_cache_dir
  14. from modelscope.utils.test_utils import test_level
  15. class SequenceClassificationTest(unittest.TestCase):
  16. def setUp(self) -> None:
  17. self.model_id = 'damo/bert-base-sst2'
  18. # switch to False if downloading everytime is not desired
  19. purge_cache = True
  20. if purge_cache:
  21. shutil.rmtree(
  22. get_model_cache_dir(self.model_id), ignore_errors=True)
  23. def predict(self, pipeline_ins: SequenceClassificationPipeline):
  24. from easynlp.appzoo import load_dataset
  25. set = load_dataset('glue', 'sst2')
  26. data = set['test']['sentence'][:3]
  27. results = pipeline_ins(data[0])
  28. print(results)
  29. results = pipeline_ins(data[1])
  30. print(results)
  31. print(data)
  32. def printDataset(self, dataset: PyDataset):
  33. for i, r in enumerate(dataset):
  34. if i > 10:
  35. break
  36. print(r)
  37. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  38. def test_run(self):
  39. model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \
  40. '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip'
  41. cache_path_str = r'.cache/easynlp/bert-base-sst2.zip'
  42. cache_path = Path(cache_path_str)
  43. if not cache_path.exists():
  44. cache_path.parent.mkdir(parents=True, exist_ok=True)
  45. cache_path.touch(exist_ok=True)
  46. with cache_path.open('wb') as ofile:
  47. ofile.write(File.read(model_url))
  48. with zipfile.ZipFile(cache_path_str, 'r') as zipf:
  49. zipf.extractall(cache_path.parent)
  50. path = r'.cache/easynlp/'
  51. model = BertForSequenceClassification(path)
  52. preprocessor = SequenceClassificationPreprocessor(
  53. path, first_sequence='sentence', second_sequence=None)
  54. pipeline1 = SequenceClassificationPipeline(model, preprocessor)
  55. self.predict(pipeline1)
  56. pipeline2 = pipeline(
  57. Tasks.text_classification, model=model, preprocessor=preprocessor)
  58. print(pipeline2('Hello world!'))
  59. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  60. def test_run_with_model_from_modelhub(self):
  61. model = Model.from_pretrained(self.model_id)
  62. preprocessor = SequenceClassificationPreprocessor(
  63. model.model_dir, first_sequence='sentence', second_sequence=None)
  64. pipeline_ins = pipeline(
  65. task=Tasks.text_classification,
  66. model=model,
  67. preprocessor=preprocessor)
  68. self.predict(pipeline_ins)
  69. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  70. def test_run_with_model_name(self):
  71. text_classification = pipeline(
  72. task=Tasks.text_classification, model=self.model_id)
  73. result = text_classification(
  74. PyDataset.load(
  75. 'glue',
  76. subset_name='sst2',
  77. split='train',
  78. target='sentence',
  79. hub=Hubs.huggingface))
  80. self.printDataset(result)
  81. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  82. def test_run_with_default_model(self):
  83. text_classification = pipeline(task=Tasks.text_classification)
  84. result = text_classification(
  85. PyDataset.load(
  86. 'glue',
  87. subset_name='sst2',
  88. split='train',
  89. target='sentence',
  90. hub=Hubs.huggingface))
  91. self.printDataset(result)
  92. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  93. def test_run_with_dataset(self):
  94. model = Model.from_pretrained(self.model_id)
  95. preprocessor = SequenceClassificationPreprocessor(
  96. model.model_dir, first_sequence='sentence', second_sequence=None)
  97. text_classification = pipeline(
  98. Tasks.text_classification, model=model, preprocessor=preprocessor)
  99. # loaded from huggingface dataset
  100. dataset = PyDataset.load(
  101. 'glue',
  102. subset_name='sst2',
  103. split='train',
  104. target='sentence',
  105. hub=Hubs.huggingface)
  106. result = text_classification(dataset)
  107. self.printDataset(result)
  108. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  109. def test_run_with_modelscope_dataset(self):
  110. text_classification = pipeline(task=Tasks.text_classification)
  111. # loaded from modelscope dataset
  112. dataset = PyDataset.load(
  113. 'squad', split='train', target='context', hub=Hubs.modelscope)
  114. result = text_classification(dataset)
  115. self.printDataset(result)
  116. if __name__ == '__main__':
  117. unittest.main()