| @@ -0,0 +1,179 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| import math | |||
| import pytest | |||
| import numpy as np | |||
| from mindspore import context | |||
| from mindspore import nn | |||
| from mindspore import Tensor | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import ParameterTuple | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.ops import composite as c | |||
| class GradOfAllInputsAndParams(nn.Cell): | |||
| def __init__(self, network, sens_param): | |||
| super().__init__() | |||
| self.grad = c.GradOperation(get_all=True, get_by_list=True, sens_param=sens_param) | |||
| self.network = network | |||
| self.params = ParameterTuple(self.network.trainable_params()) | |||
| def construct(self, *inputs): | |||
| gout = self.grad(self.network, self.params)(*inputs) | |||
| return gout | |||
| class LSTM(nn.Cell): | |||
| def __init__(self, input_s, hidden_s, num_layers, has_bias, batch_first, bidirectional, dropout): | |||
| super().__init__() | |||
| self.lstm = nn.LSTM(input_size=input_s, hidden_size=hidden_s, num_layers=num_layers, has_bias=has_bias, | |||
| batch_first=batch_first, bidirectional=bidirectional, dropout=dropout) | |||
| def construct(self, inp, h0, c0): | |||
| return self.lstm(inp, (h0, c0)) | |||
| class LSTMWeightBias(): | |||
| def __init__(self, num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional): | |||
| self.num_layers = num_layers | |||
| self.has_bias = has_bias | |||
| self.input_s = input_s | |||
| self.num_directions = num_directions | |||
| self.hidden_s = hidden_s | |||
| self.bidirectional = bidirectional | |||
| def get_weight_bias(self): | |||
| stdv = 1 / math.sqrt(self.hidden_s) | |||
| gate_size = 4 * self.hidden_s | |||
| w_list_value = [] | |||
| b_list_value = [] | |||
| for i in range(self.num_layers): | |||
| b0 = np.zeros(gate_size, dtype=np.float16) | |||
| w_shape = self.input_s if i == 0 else (self.num_directions * self.hidden_s) | |||
| w_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16) | |||
| w_list_value.append(Parameter(initializer(Tensor(w_np), [w_shape + self.hidden_s, gate_size]), | |||
| name="weight_fw" + str(i))) | |||
| if self.has_bias: | |||
| b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) | |||
| b_list_value.append(Parameter(initializer(Tensor(b_np), [gate_size]), name="bias_fw" + str(i))) | |||
| else: | |||
| b_list_value.append(Parameter(initializer(Tensor(b0), [gate_size]), name="bias_fw" + str(i))) | |||
| if self.bidirectional: | |||
| w_bw_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16) | |||
| b_list_value.append(Parameter(initializer(Tensor(w_bw_np), [w_shape + self.hidden_s, gate_size]), | |||
| name="weight_bw" + str(i))) | |||
| b_bw_np = np.random.uniform(-stdv, stdv, (4 * self.hidden_s)).astype( | |||
| np.float16) if self.has_bias else b0 | |||
| b_list_value.append(Parameter(initializer(Tensor(b_bw_np), [gate_size]), name="bias_bw" + str(i))) | |||
| w_list_value = ParameterTuple(w_list_value) | |||
| b_list_value = ParameterTuple(b_list_value) | |||
| return w_list_value, b_list_value | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.env_onecard | |||
| def test_sit_lstm_forward_input_3_32_32_is_32_hs_16(): | |||
| input_s = 32 | |||
| hidden_s = 16 | |||
| has_bias = True | |||
| bidirectional = False | |||
| num_layers = 1 | |||
| num_directions = 1 | |||
| fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional) | |||
| w_list_value, b_list_value = fact.get_weight_bias() | |||
| h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) | |||
| c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) | |||
| input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32)) | |||
| # graph mode | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, | |||
| bidirectional=bidirectional, dropout=0.0) | |||
| net.lstm.w_list = w_list_value | |||
| net.lstm.b_list = b_list_value | |||
| out, (hy, cy) = net(input_ms, h0, c0) | |||
| # pynative mode | |||
| context.set_context(mode=context.PYNATIVE_MODE) | |||
| net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, | |||
| bidirectional=bidirectional, dropout=0.0) | |||
| net_pynative.lstm.w_list = w_list_value | |||
| net_pynative.lstm.b_list = b_list_value | |||
| out_pynative, (hy_pynative, cy_pynative) = net_pynative(input_ms, h0, c0) | |||
| assert np.allclose(out.asnumpy(), out_pynative.asnumpy(), 0.0001, 0.0001) | |||
| assert np.allclose(hy.asnumpy(), hy_pynative.asnumpy(), 0.0001, 0.0001) | |||
| assert np.allclose(cy.asnumpy(), cy_pynative.asnumpy(), 0.0001, 0.0001) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.env_onecard | |||
| def test_sit_lstm_grad_input_3_32_32_is_32_hs_16(): | |||
| input_s = 32 | |||
| hidden_s = 16 | |||
| has_bias = True | |||
| bidirectional = False | |||
| num_layers = 1 | |||
| num_directions = 1 | |||
| fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional) | |||
| w_list_value, b_list_value = fact.get_weight_bias() | |||
| h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) | |||
| c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32)) | |||
| input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32)) | |||
| # graph mode | |||
| context.set_context(mode=context.GRAPH_MODE) | |||
| net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, | |||
| bidirectional=bidirectional, dropout=0.0) | |||
| net.lstm.w_list = w_list_value | |||
| net.lstm.b_list = b_list_value | |||
| grad_net_inp = GradOfAllInputsAndParams(net, sens_param=False) | |||
| grad_net_inp.set_train() | |||
| out_grad, _ = grad_net_inp(input_ms, h0, c0) | |||
| x_grad = out_grad[0].asnumpy() | |||
| h_grad = out_grad[1].asnumpy() | |||
| c_grad = out_grad[2].asnumpy() | |||
| # pynative mode | |||
| context.set_context(mode=context.PYNATIVE_MODE) | |||
| net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False, | |||
| bidirectional=bidirectional, dropout=0.0) | |||
| net_pynative.lstm.w_list = w_list_value | |||
| net_pynative.lstm.b_list = b_list_value | |||
| grad_net_inp_pynative = GradOfAllInputsAndParams(net_pynative, sens_param=False) | |||
| grad_net_inp_pynative.set_train() | |||
| out_grad_pynative, _ = grad_net_inp_pynative(input_ms, h0, c0) | |||
| x_grad_pynative = out_grad_pynative[0].asnumpy() | |||
| h_grad_pynative = out_grad_pynative[1].asnumpy() | |||
| c_grad_pynative = out_grad_pynative[2].asnumpy() | |||
| assert np.allclose(x_grad, x_grad_pynative, 0.0001, 0.0001) | |||
| assert np.allclose(h_grad, h_grad_pynative, 0.0001, 0.0001) | |||
| assert np.allclose(c_grad, c_grad_pynative, 0.0001, 0.0001) | |||