You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_lstm_op.py 15 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. import pytest
  16. import mindspore.nn as nn
  17. from mindspore.common.api import ms_function
  18. import numpy as np
  19. import mindspore.context as context
  20. from mindspore.common.initializer import initializer
  21. from mindspore.ops import composite as C
  22. from mindspore.ops import operations as P
  23. from mindspore.common.tensor import Tensor
  24. from mindspore.common.parameter import ParameterTuple, Parameter
  25. context.set_context(device_target='CPU')
  26. class LstmNet(nn.Cell):
  27. def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
  28. super(LstmNet, self).__init__()
  29. num_directions = 1
  30. if bidirectional:
  31. num_directions = 2
  32. self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
  33. input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]],
  34. [[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]],
  35. [[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]],
  36. [[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]],
  37. [[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]]
  38. ]).astype(np.float32)
  39. self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
  40. self.h = Parameter(initializer(
  41. Tensor(
  42. np.array([0.1, 0.1, 0.1, 0.1]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype(
  43. np.float32)),
  44. [num_layers * num_directions, batch_size, hidden_size]), name='h')
  45. self.c = Parameter(initializer(
  46. Tensor(
  47. np.array([0.2, 0.2, 0.2, 0.2]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype(
  48. np.float32)),
  49. [num_layers * num_directions, batch_size, hidden_size]), name='c')
  50. wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01],
  51. [-6.4257e-02, -2.4807e-01, 1.3550e-02], # i
  52. [-3.2140e-01, 5.5578e-01, 6.3589e-01],
  53. [1.6547e-01, -7.9030e-02, -2.0045e-01],
  54. [-6.9863e-01, 5.9773e-01, -3.9062e-01],
  55. [-3.0253e-01, -1.9464e-01, 7.0591e-01],
  56. [-4.0835e-01, 3.6751e-01, 4.7989e-01],
  57. [-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32) # .reshape([1,-1])
  58. whh = np.array([[-0.4820, -0.2350],
  59. [-0.1195, 0.0519],
  60. [0.2162, -0.1178],
  61. [0.6237, 0.0711],
  62. [0.4511, -0.3961],
  63. [-0.5962, 0.0906],
  64. [0.1867, -0.1225],
  65. [0.1831, 0.0850]]).astype(np.float32) # .reshape([1,-1])
  66. wih = wih.transpose((1, 0))
  67. whh = whh.transpose((1, 0))
  68. bih = np.zeros((1, 8)).astype(np.float32)
  69. w_np = np.concatenate((wih, whh, bih), axis=0).reshape([-1, 1, 1])
  70. self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
  71. @ms_function
  72. def construct(self):
  73. return self.lstm(self.x, self.h, self.c, self.w)
  74. @pytest.mark.level0
  75. @pytest.mark.platform_x86_cpu
  76. @pytest.mark.env_onecard
  77. def test_lstm():
  78. seq_len = 5
  79. batch_size = 2
  80. input_size = 3
  81. hidden_size = 2
  82. num_layers = 1
  83. has_bias = True
  84. bidirectional = False
  85. dropout = 0.0
  86. num_directions = 1
  87. if bidirectional:
  88. num_directions = 2
  89. net = LstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
  90. y, h, c, _, _ = net()
  91. print(y)
  92. print(c)
  93. print(h)
  94. expect_y = np.array([[[-0.16709016, 0.13125697],
  95. [-0.08438572, -0.01969833]],
  96. [[-0.2746155, 0.32764038],
  97. [-0.06504016, -0.07770399]],
  98. [[-0.00140004, 0.17706314],
  99. [0.03244496, -0.10135599]],
  100. [[0.08328028, 0.06437367],
  101. [-0.04133911, -0.11072896]],
  102. [[0.19004421, -0.02852732],
  103. [0.09138509, -0.00344161]]]
  104. )
  105. error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4
  106. diff = y.asnumpy() - expect_y
  107. assert np.all(diff < error)
  108. assert np.all(-diff < error)
  109. #
  110. expect_h = np.array([[[0.19004421, -0.02852732],
  111. [0.09138509, -0.00344161]]])
  112. error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
  113. diff = h.asnumpy() - expect_h
  114. assert np.all(diff < error)
  115. assert np.all(-diff < error)
  116. #
  117. expect_c = np.array([[[0.34533143, -0.06313794],
  118. [0.169008, -0.00555446]]])
  119. error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
  120. diff = c.asnumpy() - expect_c
  121. assert np.all(diff < error)
  122. assert np.all(-diff < error)
  123. class MultiLayerBiLstmNet(nn.Cell):
  124. def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
  125. super(MultiLayerBiLstmNet, self).__init__()
  126. num_directions = 1
  127. if bidirectional:
  128. num_directions = 2
  129. self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias,
  130. bidirectional=bidirectional, dropout=dropout)
  131. input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404],
  132. [-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]],
  133. [[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706],
  134. [0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]],
  135. [[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742],
  136. [-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]],
  137. [[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026],
  138. [1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]],
  139. [[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365],
  140. [1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804,
  141. -1.0685]]]).astype(np.float32)
  142. self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
  143. self.h0 = Parameter(initializer(
  144. Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
  145. [num_directions, batch_size, hidden_size]), name='h0')
  146. self.c0 = Parameter(initializer(
  147. Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
  148. [num_directions, batch_size, hidden_size]), name='c0')
  149. self.h1 = Parameter(initializer(
  150. Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
  151. [num_directions, batch_size, hidden_size]), name='h1')
  152. self.c1 = Parameter(initializer(
  153. Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
  154. [num_directions, batch_size, hidden_size]), name='c1')
  155. self.h = ParameterTuple((self.h0, self.h1))
  156. self.c = ParameterTuple((self.c0, self.c1))
  157. @ms_function
  158. def construct(self):
  159. return self.lstm(self.x, (self.h, self.c))
  160. @pytest.mark.level0
  161. @pytest.mark.platform_x86_cpu
  162. @pytest.mark.env_onecard
  163. def test_multi_layer_bilstm():
  164. seq_len = 5
  165. batch_size = 2
  166. input_size = 10
  167. hidden_size = 2
  168. num_layers = 2
  169. has_bias = True
  170. bidirectional = True
  171. dropout = 0.0
  172. num_directions = 1
  173. if bidirectional:
  174. num_directions = 2
  175. net = MultiLayerBiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional,
  176. dropout)
  177. y, h, c, _, _ = net()
  178. print(y)
  179. print(h)
  180. print(c)
  181. class Grad(nn.Cell):
  182. def __init__(self, network):
  183. super(Grad, self).__init__()
  184. self.network = network
  185. self.weights = ParameterTuple(network.trainable_params())
  186. self.grad = C.GradOperation('grad',
  187. get_by_list=True,
  188. sens_param=True)
  189. @ms_function
  190. def construct(self, output_grad):
  191. weights = self.weights
  192. grads = self.grad(self.network, weights)(output_grad)
  193. return grads
  194. class Net(nn.Cell):
  195. def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
  196. super(Net, self).__init__()
  197. num_directions = 1
  198. if bidirectional:
  199. num_directions = 2
  200. input_np = np.array([[[-0.5907, 1.0557, 1.7283, 0.6706, -1.2550, -0.5298, -0.2290, -0.6735, 0.8555, 1.4836],
  201. [-1.7070, -0.5347, -0.9105, -0.2598, 0.0588, 1.5496, 1.0757, 0.3760, -1.2020, -0.2868]],
  202. [[0.0151, 0.2126, 0.8090, -0.5292, -2.5590, 0.4279, -0.3081, -1.4706, -0.0498, 1.2301],
  203. [0.4165, -0.5391, -0.0996, 0.1928, -0.4909, -0.1255, 0.4444, -1.3687, 1.3096, 0.6553]],
  204. [[-0.7802, -0.2083, -0.6388, 1.3757, 0.4293, 0.5363, 0.3202, -0.6687, -1.3864, -0.2953],
  205. [1.0799, -0.7204, 0.1130, -0.5857, -0.4855, -1.1068, 1.0126, 0.8716, 1.5460, -0.7392]],
  206. [[2.2645, -0.6586, -0.2227, 1.4290, -0.5006, -1.6576, -0.1793, 0.5319, 0.1360, 0.2707],
  207. [-0.4071, 0.1575, 1.4199, -0.9156, 0.1855, 0.4947, 1.0460, -0.6365, 0.1191, -0.6374]],
  208. [[0.2468, 1.0815, -0.4893, 0.0664, 0.6405, -2.2967, 0.7612, 0.8759, 0.5685, -1.0999],
  209. [-0.7272, -1.7750, -0.1164, -0.7159, 0.0061, -0.7839, -1.8329, 0.3434, -0.5634,
  210. 0.5384]]]).astype(np.float32)
  211. self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
  212. self.h0 = Parameter(initializer(
  213. Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
  214. [num_directions, batch_size, hidden_size]), name='h0')
  215. self.c0 = Parameter(initializer(
  216. Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
  217. [num_directions, batch_size, hidden_size]), name='c0')
  218. wih_l0 = np.array([[0.2300, 0.6668, 0.4703, 0.0425, 0.0464, 0.6825, 0.2249, -0.4315, -0.2449, 0.2964],
  219. [-0.2811, -0.3444, 0.2557, -0.5137, -0.5518, 0.1652, -0.6720, 0.1066, 0.3586, 0.6299],
  220. [0.5728, -0.1784, 0.5661, 0.4012, 0.3856, -0.1899, 0.3102, 0.3717, -0.5651, 0.1952],
  221. [0.1026, -0.0527, 0.1198, -0.3080, 0.2292, 0.5757, -0.3567, -0.2731, -0.0586, -0.2849],
  222. [0.2194, -0.1622, 0.3219, -0.3008, -0.3713, -0.3034, -0.2385, 0.0412, -0.5205, 0.0280],
  223. [-0.5499, -0.0733, -0.5236, -0.6753, -0.7045, -0.1839, -0.1037, -0.5026, -0.4055, -0.3416],
  224. [0.1573, -0.1301, -0.2882, -0.3464, 0.6643, 0.1980, -0.6804, 0.5359, 0.5996, 0.0124],
  225. [-0.6436, 0.0587, -0.6520, -0.0471, 0.1667, 0.6042, 0.5752, -0.6296, -0.2976,
  226. -0.3757]]).astype(np.float32).reshape([1, -1])
  227. whh_l0 = np.array([[0.3358, 0.2790],
  228. [-0.5355, 0.0989],
  229. [-0.1402, 0.5120],
  230. [0.1335, 0.1653],
  231. [0.3533, -0.3531],
  232. [0.4166, -0.4420],
  233. [-0.5454, -0.1720],
  234. [0.0041, -0.0799]]).astype(np.float32).reshape([1, -1])
  235. bih_l0 = np.array([0.5518, 0.1083, 0.4829, 0.0607, -0.1770, -0.6944, 0.3059, 0.5354]).astype(
  236. np.float32).reshape([1, -1])
  237. bhh_l0 = np.array([0.5025, -0.1261, -0.5405, 0.3220, -0.3441, 0.6488, -0.0284, -0.2334]).astype(
  238. np.float32).reshape([1, -1])
  239. w0_np = np.concatenate(
  240. (wih_l0, whh_l0, bih_l0 + bhh_l0),
  241. axis=1).reshape([-1, 1, 1])
  242. self.w0 = Parameter(initializer(Tensor(w0_np), w0_np.shape), name='w0')
  243. self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
  244. has_bias=has_bias, bidirectional=bidirectional, dropout=dropout)
  245. @ms_function
  246. def construct(self):
  247. return self.lstm(self.x, self.h0, self.c0, self.w0)[0]
  248. @pytest.mark.level0
  249. @pytest.mark.platform_x86_cpu
  250. @pytest.mark.env_onecard
  251. def test_grad():
  252. seq_len = 5
  253. batch_size = 2
  254. input_size = 10
  255. hidden_size = 2
  256. num_layers = 1
  257. has_bias = True
  258. bidirectional = False
  259. dropout = 0.0
  260. num_directions = 1
  261. if bidirectional:
  262. num_directions = 2
  263. net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout))
  264. dy = np.array([[[-3.5471e-01, 7.0540e-01],
  265. [2.7161e-01, 1.0865e+00]],
  266. [[-4.2431e-01, 1.4955e+00],
  267. [-4.0418e-01, -2.3282e-01]],
  268. [[-1.3654e+00, 1.9251e+00],
  269. [-4.6481e-01, 1.3138e+00]],
  270. [[1.2914e+00, -2.3753e-01],
  271. [5.3589e-01, -1.0981e-01]],
  272. [[-1.6032e+00, -1.8818e-01],
  273. [1.0065e-01, 9.2045e-01]]]).astype(np.float32)
  274. dx, dhx, dcx, dw = net(Tensor(dy))
  275. print(dx)
  276. print(dhx)
  277. print(dcx)
  278. print(dw)
  279. # test_multi_layer_bilstm()
  280. # test_lstm()
  281. # tf_lstm_test()
  282. # test_grad()