You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_transformer.py 5.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """ test transformer"""
  16. import numpy as np
  17. from mindspore import Tensor
  18. from mindspore.common import dtype
  19. from mindspore.nn.parallel import MultiHeadAttention, FeedForward, TransformerEncoderLayer, TransformerEncoder, \
  20. TransformerDecoder, TransformerDecoderLayer, Transformer
  21. from mindspore.common.api import _executor
  22. def test_transformer_encoder_only():
  23. model = Transformer(encoder_layers=2,
  24. decoder_layers=0,
  25. hidden_size=64,
  26. ffn_hidden_size=64,
  27. src_seq_length=16,
  28. tgt_seq_length=32)
  29. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  30. encoder_input_mask = Tensor(np.ones((2, 1, 20, 20)), dtype.float16)
  31. _executor.compile(model, encoder_input_value, encoder_input_mask)
  32. def test_encoder_and_decoder():
  33. model = Transformer(encoder_layers=1,
  34. decoder_layers=2,
  35. hidden_size=64,
  36. ffn_hidden_size=64,
  37. src_seq_length=20,
  38. tgt_seq_length=20)
  39. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  40. encoder_input_mask = Tensor(np.ones((2, 1, 20, 20)), dtype.float16)
  41. decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
  42. decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
  43. memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
  44. _executor.compile(model, encoder_input_value, encoder_input_mask,
  45. decoder_input_value,
  46. decoder_input_mask,
  47. memory_mask)
  48. def test_transformer_encoder():
  49. model = TransformerEncoder(num_layers=2,
  50. hidden_size=8,
  51. ffn_hidden_size=64,
  52. seq_length=16,
  53. num_heads=2)
  54. encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
  55. encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), dtype.float16)
  56. _executor.compile(model,
  57. encoder_input_value,
  58. encoder_input_mask)
  59. def test_transformer_encoder_layer():
  60. model = TransformerEncoderLayer(hidden_size=8, ffn_hidden_size=64, seq_length=16,
  61. num_heads=2)
  62. encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
  63. encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), dtype.float16)
  64. _executor.compile(model,
  65. encoder_input_value,
  66. encoder_input_mask)
  67. def test_transformer_encoder_layer_post_ture():
  68. model = TransformerEncoderLayer(hidden_size=8, ffn_hidden_size=64, seq_length=16,
  69. num_heads=2, post_layernorm_residual=True)
  70. encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
  71. encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), dtype.float16)
  72. _executor.compile(model,
  73. encoder_input_value,
  74. encoder_input_mask)
  75. def test_transformer_decoder():
  76. model = TransformerDecoder(num_layers=1,
  77. hidden_size=64,
  78. ffn_hidden_size=64,
  79. num_heads=2,
  80. seq_length=10)
  81. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  82. decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
  83. decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
  84. memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
  85. _executor.compile(model, decoder_input_value, decoder_input_mask,
  86. encoder_input_value,
  87. memory_mask)
  88. def test_transformer_decoder_layer():
  89. model = TransformerDecoderLayer(
  90. hidden_size=64,
  91. ffn_hidden_size=64,
  92. num_heads=2,
  93. seq_length=10)
  94. encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
  95. decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
  96. decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
  97. memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
  98. _executor.compile(model, decoder_input_value, decoder_input_mask,
  99. encoder_input_value,
  100. memory_mask)
  101. def test_multihead_attention():
  102. model = MultiHeadAttention(hidden_size=15,
  103. num_heads=3)
  104. from_tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
  105. to_tensor = Tensor(np.ones((2, 20, 15)), dtype.float16)
  106. attention_mask = Tensor(np.ones((2, 1, 20, 20)), dtype.float16)
  107. _executor.compile(model, from_tensor, to_tensor, attention_mask)
  108. def test_feedforward_layer():
  109. model = FeedForward(hidden_size=15,
  110. ffn_hidden_size=30,
  111. dropout_rate=0.1,
  112. hidden_act='relu')
  113. tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
  114. _executor.compile(model, tensor)