You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_transformer.py 10 kB

4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """ test transformer"""
  16. import numpy as np
  17. import pytest
  18. from mindspore import Tensor
  19. from mindspore.common import dtype
  20. from mindspore.parallel.nn import MultiHeadAttention, FeedForward, TransformerEncoderLayer, TransformerEncoder, \
  21. TransformerDecoder, TransformerDecoderLayer, Transformer, CrossEntropyLoss, AttentionMask, FixedSparseAttention
  22. from mindspore.common.api import _cell_graph_executor
  23. def test_transformer_encoder_only():
  24. model = Transformer(batch_size=2,
  25. src_seq_length=20,
  26. tgt_seq_length=10,
  27. encoder_layers=2,
  28. decoder_layers=0,
  29. hidden_size=64,
  30. ffn_hidden_size=64)
  31. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  32. encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
  33. _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask)
  34. def test_transformer_encoder_log_softmax():
  35. with pytest.raises(ValueError):
  36. model = Transformer(batch_size=2,
  37. src_seq_length=20,
  38. tgt_seq_length=10,
  39. encoder_layers=2,
  40. decoder_layers=0,
  41. hidden_act='logsoftmax',
  42. hidden_size=64,
  43. ffn_hidden_size=64)
  44. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  45. encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
  46. _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask)
  47. def test_transformer_encoder_leakyrelu():
  48. model = Transformer(batch_size=2,
  49. src_seq_length=20,
  50. tgt_seq_length=10,
  51. encoder_layers=2,
  52. decoder_layers=0,
  53. hidden_act='leakyrelu',
  54. hidden_size=64,
  55. ffn_hidden_size=64)
  56. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  57. encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
  58. _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask)
  59. def test_transformer_encoder_logsigmoid():
  60. model = Transformer(batch_size=2,
  61. src_seq_length=20,
  62. tgt_seq_length=10,
  63. encoder_layers=2,
  64. decoder_layers=0,
  65. hidden_act='logsigmoid',
  66. hidden_size=64,
  67. ffn_hidden_size=64)
  68. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  69. encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
  70. _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask)
  71. def test_encoder_and_decoder():
  72. model = Transformer(batch_size=2,
  73. src_seq_length=20,
  74. tgt_seq_length=10,
  75. encoder_layers=1,
  76. decoder_layers=2,
  77. hidden_size=64,
  78. ffn_hidden_size=64)
  79. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  80. encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
  81. decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
  82. decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
  83. memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
  84. _cell_graph_executor.compile(model, encoder_input_value, encoder_input_mask,
  85. decoder_input_value,
  86. decoder_input_mask,
  87. memory_mask)
  88. def test_transformer_encoder():
  89. model = TransformerEncoder(batch_size=2,
  90. seq_length=16,
  91. num_layers=2,
  92. hidden_size=8,
  93. ffn_hidden_size=64,
  94. num_heads=2)
  95. encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
  96. encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
  97. _cell_graph_executor.compile(model,
  98. encoder_input_value,
  99. encoder_input_mask)
  100. def test_transformer_encoder_layer():
  101. model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
  102. num_heads=2)
  103. encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
  104. encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
  105. _cell_graph_executor.compile(model,
  106. encoder_input_value,
  107. encoder_input_mask)
  108. def test_transformer_encoder_layer_post_ture():
  109. model = TransformerEncoderLayer(batch_size=2,
  110. seq_length=16,
  111. hidden_size=8, ffn_hidden_size=64,
  112. num_heads=2, post_layernorm_residual=True)
  113. encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
  114. encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
  115. _cell_graph_executor.compile(model,
  116. encoder_input_value,
  117. encoder_input_mask)
  118. def test_transformer_decoder():
  119. model = TransformerDecoder(num_layers=1,
  120. batch_size=2,
  121. src_seq_length=20,
  122. tgt_seq_length=10,
  123. hidden_size=64,
  124. ffn_hidden_size=64,
  125. num_heads=2)
  126. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  127. decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
  128. decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
  129. memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
  130. _cell_graph_executor.compile(model, decoder_input_value, decoder_input_mask,
  131. encoder_input_value,
  132. memory_mask)
  133. def test_transformer_decoder_layer():
  134. model = TransformerDecoderLayer(
  135. batch_size=2,
  136. src_seq_length=20,
  137. tgt_seq_length=10,
  138. hidden_size=64,
  139. ffn_hidden_size=64,
  140. num_heads=2)
  141. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  142. decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
  143. decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
  144. memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
  145. _cell_graph_executor.compile(model, decoder_input_value, decoder_input_mask,
  146. encoder_input_value,
  147. memory_mask)
  148. def test_multihead_attention():
  149. model = MultiHeadAttention(hidden_size=15,
  150. src_seq_length=20,
  151. tgt_seq_length=20,
  152. batch_size=2,
  153. num_heads=3)
  154. from_tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
  155. to_tensor = Tensor(np.ones((2, 20, 15)), dtype.float16)
  156. attention_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
  157. _cell_graph_executor.compile(model, from_tensor, to_tensor, to_tensor, attention_mask)
  158. def test_multihead_attention_wrong_batch():
  159. model = MultiHeadAttention(hidden_size=15,
  160. src_seq_length=20,
  161. tgt_seq_length=20,
  162. batch_size=2,
  163. num_heads=3)
  164. from_tensor = Tensor(np.ones((3, 20, 15)), dtype.float32)
  165. to_tensor = Tensor(np.ones((3, 20, 15)), dtype.float16)
  166. attention_mask = Tensor(np.ones((3, 20, 20)), dtype.float16)
  167. with pytest.raises(ValueError):
  168. _cell_graph_executor.compile(model, from_tensor, to_tensor, to_tensor, attention_mask)
  169. def test_feedforward_layer():
  170. model = FeedForward(hidden_size=15,
  171. ffn_hidden_size=30,
  172. dropout_rate=0.1,
  173. hidden_act='relu')
  174. tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
  175. _cell_graph_executor.compile(model, tensor)
  176. def test_cross_entroy():
  177. model = CrossEntropyLoss()
  178. logits = Tensor(np.array([[3, 5, 6, 9, 12, 33, 42, 12, 32, 72]]), dtype.float32)
  179. labels_np = np.array([1]).astype(np.int32)
  180. input_mask = Tensor(np.ones(1).astype(np.float32))
  181. labels = Tensor(labels_np)
  182. _cell_graph_executor.compile(model, logits, labels, input_mask)
  183. def test_attention_mask():
  184. model = AttentionMask(seq_length=19)
  185. inputs = Tensor(np.ones((2, 19)), dtype.float32)
  186. _cell_graph_executor.compile(model, inputs)
  187. def test_sparse_attention():
  188. model = FixedSparseAttention(batch_size=2,
  189. seq_length=1024,
  190. size_per_head=64,
  191. num_heads=8,
  192. block_size=64)
  193. q = Tensor(np.ones((2, 1024, 512)), dtype.float16)
  194. k = Tensor(np.ones((2, 1024, 512)), dtype.float16)
  195. v = Tensor(np.ones((2, 1024, 512)), dtype.float16)
  196. mask = Tensor(np.ones((2, 1024, 1024)), dtype.float32)
  197. _cell_graph_executor.compile(model, q, k, v, mask)