You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_sampler.py 1.9 kB

7 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. import random
  2. import unittest
  3. import torch
  4. from fastNLP.core.dataset import DataSet
  5. from fastNLP.core.sampler import convert_to_torch_tensor, SequentialSampler, RandomSampler, \
  6. k_means_1d, k_means_bucketing, simple_sort_bucketing, BucketSampler
  7. class TestSampler(unittest.TestCase):
  8. def test_convert_to_torch_tensor(self):
  9. data = [[1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [1, 3, 4, 5, 2]]
  10. ans = convert_to_torch_tensor(data, False)
  11. assert isinstance(ans, torch.Tensor)
  12. assert tuple(ans.shape) == (3, 5)
  13. def test_sequential_sampler(self):
  14. sampler = SequentialSampler()
  15. data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10]
  16. for idx, i in enumerate(sampler(data)):
  17. assert idx == i
  18. def test_random_sampler(self):
  19. sampler = RandomSampler()
  20. data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10]
  21. ans = [data[i] for i in sampler(data)]
  22. assert len(ans) == len(data)
  23. for d in ans:
  24. assert d in data
  25. def test_k_means(self):
  26. centroids, assign = k_means_1d([21, 3, 25, 7, 9, 22, 4, 6, 28, 10], 2, max_iter=5)
  27. centroids, assign = list(centroids), list(assign)
  28. assert len(centroids) == 2
  29. assert len(assign) == 10
  30. def test_k_means_bucketing(self):
  31. res = k_means_bucketing([21, 3, 25, 7, 9, 22, 4, 6, 28, 10], [None, None])
  32. assert len(res) == 2
  33. def test_simple_sort_bucketing(self):
  34. _ = simple_sort_bucketing([21, 3, 25, 7, 9, 22, 4, 6, 28, 10])
  35. assert len(_) == 10
  36. def test_BucketSampler(self):
  37. sampler = BucketSampler(num_buckets=3, batch_size=16, seq_lens_field_name="seq_len")
  38. data_set = DataSet({"x": [[0] * random.randint(1, 10)] * 10, "y": [[5, 6]] * 10})
  39. data_set.apply(lambda ins: len(ins["x"]), new_field_name="seq_len")
  40. indices = sampler(data_set)
  41. self.assertEqual(len(indices), 10)
  42. # 跑通即可,不验证效果