You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_sampler.py 1.7 kB

6 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import random
  2. import unittest
  3. import torch
  4. from fastNLP.core.dataset import DataSet
  5. from fastNLP.core.sampler import SequentialSampler, RandomSampler, \
  6. k_means_1d, k_means_bucketing, simple_sort_bucketing, BucketSampler
  7. class TestSampler(unittest.TestCase):
  8. def test_sequential_sampler(self):
  9. sampler = SequentialSampler()
  10. data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10]
  11. for idx, i in enumerate(sampler(data)):
  12. assert idx == i
  13. def test_random_sampler(self):
  14. sampler = RandomSampler()
  15. data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10]
  16. ans = [data[i] for i in sampler(data)]
  17. assert len(ans) == len(data)
  18. for d in ans:
  19. assert d in data
  20. def test_k_means(self):
  21. centroids, assign = k_means_1d([21, 3, 25, 7, 9, 22, 4, 6, 28, 10], 2, max_iter=5)
  22. centroids, assign = list(centroids), list(assign)
  23. assert len(centroids) == 2
  24. assert len(assign) == 10
  25. def test_k_means_bucketing(self):
  26. res = k_means_bucketing([21, 3, 25, 7, 9, 22, 4, 6, 28, 10], [None, None])
  27. assert len(res) == 2
  28. def test_simple_sort_bucketing(self):
  29. _ = simple_sort_bucketing([21, 3, 25, 7, 9, 22, 4, 6, 28, 10])
  30. assert len(_) == 10
  31. def test_BucketSampler(self):
  32. sampler = BucketSampler(num_buckets=3, batch_size=16, seq_lens_field_name="seq_len")
  33. data_set = DataSet({"x": [[0] * random.randint(1, 10)] * 10, "y": [[5, 6]] * 10})
  34. data_set.apply(lambda ins: len(ins["x"]), new_field_name="seq_len")
  35. indices = sampler(data_set)
  36. self.assertEqual(len(indices), 10)
  37. # 跑通即可,不验证效果