import unittest import numpy as np import torch from fastNLP import FieldArray from fastNLP.core.field import _get_ele_type_and_dim from fastNLP import AutoPadder class TestFieldArrayTyepDimDetect(unittest.TestCase): """ 检测FieldArray能否正确识别type与ndim """ def test_case1(self): # 1.1 常规类型测试 for value in [1, True, 1.0, 'abc']: type_ = type(value) _type, _dim = _get_ele_type_and_dim(cell=value) self.assertListEqual([_type, _dim], [type_, 0]) # 1.2 mix类型报错 with self.assertRaises(Exception): value = [1, 2, 1.0] self.assertRaises(_get_ele_type_and_dim(value)) # 带有numpy的测试 # 2.1 value = np.array([1, 2, 3]) type_ = value.dtype dim_ = 1 self.assertSequenceEqual(_get_ele_type_and_dim(cell=value), [type_, dim_]) # 2.2 value = np.array([[1, 2], [3, 4, 5]]) # char embedding的场景 self.assertSequenceEqual([int, 2], _get_ele_type_and_dim(value)) # 2.3 value = np.zeros((3, 4)) self.assertSequenceEqual([value.dtype, 2], _get_ele_type_and_dim(value)) # 2.4 测试错误的dimension with self.assertRaises(Exception): value = np.array([[1, 2], [3, [1]]]) _get_ele_type_and_dim(value) # 2.5 测试混合类型 with self.assertRaises(Exception): value = np.array([[1, 2], [3.0]]) _get_ele_type_and_dim(value) # 带有tensor的测试 # 3.1 word embedding的场景 value = torch.zeros(3, 10) self.assertSequenceEqual([value.dtype, 2], _get_ele_type_and_dim(value)) # 3.2 char embedding/image的场景 value = torch.zeros(3, 32, 32) self.assertSequenceEqual([value.dtype, 3], _get_ele_type_and_dim(value)) class TestFieldArrayInit(unittest.TestCase): """ 1) 如果DataSet使用dict初始化,那么在add_field中会构造FieldArray: 1.1) 二维list DataSet({"x": [[1, 2], [3, 4]]}) 1.2) 二维array DataSet({"x": np.array([[1, 2], [3, 4]])}) 1.3) 三维list DataSet({"x": [[[1, 2], [3, 4]], [[1, 2], [3, 4]]]}) 2) 如果DataSet使用list of Instance 初始化,那么在append中会先对第一个样本初始化FieldArray; 然后后面的样本使用FieldArray.append进行添加。 2.1) 一维list DataSet([Instance(x=[1, 2, 3, 4])]) 2.2) 一维array DataSet([Instance(x=np.array([1, 2, 3, 4]))]) 2.3) 二维list DataSet([Instance(x=[[1, 2], [3, 4]])]) 2.4) 二维array DataSet([Instance(x=np.array([[1, 2], [3, 4]]))]) """ def test_init_v1(self): # 二维list fa = FieldArray("x", [[1, 2], [3, 4]] * 5, is_input=True) def test_init_v2(self): # 二维array fa = FieldArray("x", np.array([[1, 2], [3, 4]] * 5), is_input=True) def test_init_v3(self): # 三维list fa = FieldArray("x", [[[1, 2], [3, 4]], [[1, 2], [3, 4]]], is_input=True) def test_init_v4(self): # 一维list val = [1, 2, 3, 4] fa = FieldArray("x", [val], is_input=True) fa.append(val) def test_init_v5(self): # 一维array val = np.array([1, 2, 3, 4]) fa = FieldArray("x", [val], is_input=True) fa.append(val) def test_init_v6(self): # 二维array val = [[1, 2], [3, 4]] fa = FieldArray("x", [val], is_input=True) fa.append(val) def test_init_v7(self): # list of array fa = FieldArray("x", [np.array([[1, 2], [3, 4]]), np.array([[1, 2], [3, 4]])], is_input=True) self.assertEqual(fa.dtype, np.array([1]).dtype) def test_init_v8(self): # 二维list val = np.array([[1, 2], [3, 4]]) fa = FieldArray("x", [val], is_input=True) fa.append(val) class TestFieldArray(unittest.TestCase): def test_main(self): fa = FieldArray("x", [1, 2, 3, 4, 5], is_input=True) self.assertEqual(len(fa), 5) fa.append(6) self.assertEqual(len(fa), 6) self.assertEqual(fa[-1], 6) self.assertEqual(fa[0], 1) fa[-1] = 60 self.assertEqual(fa[-1], 60) self.assertEqual(fa.get(0), 1) self.assertTrue(isinstance(fa.get([0, 1, 2]), np.ndarray)) self.assertListEqual(list(fa.get([0, 1, 2])), [1, 2, 3]) def test_type_conversion(self): fa = FieldArray("x", [1, 2, 3, 4, 5], is_input=True) self.assertEqual(fa.dtype, int) fa = FieldArray("y", [1.1, 2.2, 3.3, 4.4, 5.5], is_input=True) fa.append(10.0) self.assertEqual(fa.dtype, float) fa = FieldArray("y", ["a", "b", "c", "d"], is_input=True) fa.append("e") self.assertEqual(fa.dtype, str) def test_support_np_array(self): fa = FieldArray("y", np.array([[1.1, 2.2, 3.3, 4.4, 5.5]]), is_input=True) self.assertEqual(fa.dtype, np.float64) fa.append(np.array([1.1, 2.2, 3.3, 4.4, 5.5])) self.assertEqual(fa.dtype, np.float64) fa = FieldArray("my_field", np.random.rand(3, 5), is_input=True) # in this case, pytype is actually a float. We do not care about it. self.assertEqual(fa.dtype, np.float64) def test_nested_list(self): fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1.1, 2.2, 3.3, 4.4, 5.5]], is_input=True) self.assertEqual(fa.dtype, float) def test_getitem_v1(self): fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1.0, 2.0, 3.0, 4.0, 5.0]], is_input=True) self.assertEqual(fa[0], [1.1, 2.2, 3.3, 4.4, 5.5]) ans = fa[[0, 1]] self.assertTrue(isinstance(ans, np.ndarray)) self.assertTrue(isinstance(ans[0], np.ndarray)) self.assertEqual(ans[0].tolist(), [1.1, 2.2, 3.3, 4.4, 5.5]) self.assertEqual(ans[1].tolist(), [1, 2, 3, 4, 5]) self.assertEqual(ans.dtype, np.float64) def test_getitem_v2(self): x = np.random.rand(10, 5) fa = FieldArray("my_field", x, is_input=True) indices = [0, 1, 3, 4, 6] for a, b in zip(fa[indices], x[indices]): self.assertListEqual(a.tolist(), b.tolist()) def test_append(self): with self.assertRaises(Exception): fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True, use_1st_ins_infer_dim_type=False) fa.append(0) with self.assertRaises(Exception): fa = FieldArray("y", [1.1, 2.2, 3.3, 4.4, 5.5], is_input=True, use_1st_ins_infer_dim_type=False) fa.append([1, 2, 3, 4, 5]) with self.assertRaises(Exception): fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True, use_1st_ins_infer_dim_type=False) fa.append([]) with self.assertRaises(Exception): fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True, use_1st_ins_infer_dim_type=False) fa.append(["str", 0, 0, 0, 1.89]) fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1.0, 2.0, 3.0, 4.0, 5.0]], is_input=True, use_1st_ins_infer_dim_type=False) fa.append([1.2, 2.3, 3.4, 4.5, 5.6]) self.assertEqual(len(fa), 3) self.assertEqual(fa[2], [1.2, 2.3, 3.4, 4.5, 5.6]) def test_ignore_type(self): # 测试新添加的参数ignore_type,用来跳过类型检查 fa = FieldArray("y", [[1.1, 2.2, "jin", {}, "hahah"], [int, 2, "$", 4, 5]], is_input=True, ignore_type=True) fa.append([1.2, 2.3, str, 4.5, print]) fa = FieldArray("y", [(1, "1"), (2, "2"), (3, "3"), (4, "4")], is_target=True, ignore_type=True) class TestAutoPadder(unittest.TestCase): def test00(self): padder = AutoPadder() # 没有类型时 contents = [(1, 2), ('str', 'a')] padder(contents, None, None, None) def test01(self): # 测试使用多维的bool, int, str, float的情况 # str padder = AutoPadder() content = ['This is a str', 'this is another str'] self.assertListEqual(content, padder(content, None, str, 0).tolist()) # 1维int content = [[1, 2, 3], [4,], [5, 6, 7, 8]] padded_content = [[1, 2, 3, 0], [4, 0, 0, 0], [5, 6, 7, 8]] self.assertListEqual(padder(content, None, int, 1).tolist(), padded_content) # 二维int padded_content = [[[1, 2, 3, 0], [4, 5, 0, 0], [7, 8, 9, 10]], [[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] content = [ [[1, 2, 3], [4, 5], [7, 8, 9, 10]], [[1]] ] self.assertListEqual(padder(content, None, int, 2).tolist(), padded_content) # 3维图片 contents = [np.random.rand(3, 4, 4).tolist() for _ in range(5)] self.assertTrue(padder(contents, None, float, 3).shape==(5, 3, 4, 4)) # 更高维度直接返回 contents = [np.random.rand(24, 3, 4, 4).tolist() for _ in range(5)] self.assertTrue(isinstance(padder(contents, None, float, 4), np.ndarray)) def test02(self): padder = AutoPadder() # 测试numpy的情况 # 0维 contents = np.arange(12) self.assertListEqual(padder(contents, None, contents.dtype, 0).tolist(), contents.tolist()) # 1维 contents = np.arange(12).reshape((3, 4)) self.assertListEqual(padder(contents, None, contents.dtype, 1).tolist(), contents.tolist()) # 2维 contents = np.ones((3, 10, 5)) self.assertListEqual(padder(contents, None, contents.dtype, 2).tolist(), contents.tolist()) # 3维 contents = [np.random.rand(3, 4, 4) for _ in range(5)] l_contents = [content.tolist() for content in contents] self.assertListEqual(padder(contents, None, contents[0].dtype, 3).tolist(), l_contents) def test03(self): padder = AutoPadder() # 测试tensor的情况 # 0维 contents = torch.arange(12) r_contents = padder(contents, None, contents.dtype, 0) self.assertSequenceEqual(r_contents.tolist(), contents.tolist()) self.assertTrue(r_contents.dtype==contents.dtype) # 0维 contents = [torch.tensor(1) for _ in range(10)] self.assertSequenceEqual(padder(contents, None, torch.int64, 0).tolist(), contents) # 1维 contents = torch.randn(3, 4) padder(contents, None, torch.float64, 1) # 3维 contents = [torch.randn(3, 4, 4) for _ in range(5)] padder(contents, None, torch.float64, 3) class TestEngChar2DPadder(unittest.TestCase): def test01(self): """ 测试EngChar2DPadder能不能正确使用 :return: """ from fastNLP import EngChar2DPadder padder = EngChar2DPadder(pad_length=0) contents = [1, 2] # 不能是0维 with self.assertRaises(Exception): padder(contents, None, np.int64, 0) contents = [[1, 2]] # 不能是1维 with self.assertRaises(Exception): padder(contents, None, np.int64, 1) contents = [ [[[[1, 2]]]] ] # 不能是3维以上 with self.assertRaises(Exception): padder(contents, None, np.int64, 3) contents = [ [[1, 2, 3], [4, 5], [7,8,9,10]], [[1]] ] self.assertListEqual([[[1, 2, 3, 0], [4, 5, 0, 0], [7, 8, 9, 10]], [[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], padder(contents, None, np.int64, 2).tolist()) padder = EngChar2DPadder(pad_length=5, pad_val=-100) self.assertListEqual( [[[1, 2, 3, -100, -100], [4, 5, -100, -100, -100], [7, 8, 9, 10, -100]], [[1, -100, -100, -100, -100], [-100, -100, -100, -100, -100], [-100, -100, -100, -100, -100]]], padder(contents, None, np.int64, 2).tolist() )