|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ==============================================================================
-
- import math
- import pytest
- import numpy as np
- from mindspore import context
- from mindspore import nn
- from mindspore import Tensor
- from mindspore.common.initializer import initializer
- from mindspore.common.parameter import ParameterTuple
- from mindspore.common.parameter import Parameter
- from mindspore.ops import composite as c
-
-
- class GradOfAllInputsAndParams(nn.Cell):
- def __init__(self, network, sens_param):
- super().__init__()
- self.grad = c.GradOperation(get_all=True, get_by_list=True, sens_param=sens_param)
- self.network = network
- self.params = ParameterTuple(self.network.trainable_params())
-
- def construct(self, *inputs):
- gout = self.grad(self.network, self.params)(*inputs)
- return gout
-
-
- class LSTM(nn.Cell):
- def __init__(self, input_s, hidden_s, num_layers, has_bias, batch_first, bidirectional, dropout):
- super().__init__()
- self.lstm = nn.LSTM(input_size=input_s, hidden_size=hidden_s, num_layers=num_layers, has_bias=has_bias,
- batch_first=batch_first, bidirectional=bidirectional, dropout=dropout)
-
- def construct(self, inp, h0, c0):
- return self.lstm(inp, (h0, c0))
-
-
- class LSTMWeightBias():
- def __init__(self, num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional):
- self.num_layers = num_layers
- self.has_bias = has_bias
- self.input_s = input_s
- self.num_directions = num_directions
- self.hidden_s = hidden_s
- self.bidirectional = bidirectional
-
- def get_weight_bias(self):
- stdv = 1 / math.sqrt(self.hidden_s)
- gate_size = 4 * self.hidden_s
- w_list_value = []
- b_list_value = []
-
- for i in range(self.num_layers):
- b0 = np.zeros(gate_size, dtype=np.float16)
- w_shape = self.input_s if i == 0 else (self.num_directions * self.hidden_s)
- w_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16)
- w_list_value.append(Parameter(initializer(Tensor(w_np), [w_shape + self.hidden_s, gate_size]),
- name="weight_fw" + str(i)))
-
- if self.has_bias:
- b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16)
- b_list_value.append(Parameter(initializer(Tensor(b_np), [gate_size]), name="bias_fw" + str(i)))
- else:
- b_list_value.append(Parameter(initializer(Tensor(b0), [gate_size]), name="bias_fw" + str(i)))
-
- if self.bidirectional:
- w_bw_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16)
- b_list_value.append(Parameter(initializer(Tensor(w_bw_np), [w_shape + self.hidden_s, gate_size]),
- name="weight_bw" + str(i)))
- b_bw_np = np.random.uniform(-stdv, stdv, (4 * self.hidden_s)).astype(
- np.float16) if self.has_bias else b0
- b_list_value.append(Parameter(initializer(Tensor(b_bw_np), [gate_size]), name="bias_bw" + str(i)))
- w_list_value = ParameterTuple(w_list_value)
- b_list_value = ParameterTuple(b_list_value)
- return w_list_value, b_list_value
-
-
- @pytest.mark.level0
- @pytest.mark.platform_arm_ascend_training
- @pytest.mark.platform_x86_ascend_training
- @pytest.mark.env_onecard
- def test_sit_lstm_forward_input_3_32_32_is_32_hs_16():
- input_s = 32
- hidden_s = 16
- has_bias = True
- bidirectional = False
- num_layers = 1
- num_directions = 1
-
- fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional)
- w_list_value, b_list_value = fact.get_weight_bias()
-
- h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
- c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
- input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32))
-
- # graph mode
- context.set_context(mode=context.GRAPH_MODE)
- net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
- bidirectional=bidirectional, dropout=0.0)
- net.lstm.w_list = w_list_value
- net.lstm.b_list = b_list_value
- out, (hy, cy) = net(input_ms, h0, c0)
-
- # pynative mode
- context.set_context(mode=context.PYNATIVE_MODE)
- net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
- bidirectional=bidirectional, dropout=0.0)
- net_pynative.lstm.w_list = w_list_value
- net_pynative.lstm.b_list = b_list_value
- out_pynative, (hy_pynative, cy_pynative) = net_pynative(input_ms, h0, c0)
-
- assert np.allclose(out.asnumpy(), out_pynative.asnumpy(), 0.0001, 0.0001)
- assert np.allclose(hy.asnumpy(), hy_pynative.asnumpy(), 0.0001, 0.0001)
- assert np.allclose(cy.asnumpy(), cy_pynative.asnumpy(), 0.0001, 0.0001)
-
-
- @pytest.mark.level0
- @pytest.mark.platform_arm_ascend_training
- @pytest.mark.platform_x86_ascend_training
- @pytest.mark.env_onecard
- def test_sit_lstm_grad_input_3_32_32_is_32_hs_16():
- input_s = 32
- hidden_s = 16
- has_bias = True
- bidirectional = False
- num_layers = 1
- num_directions = 1
-
- fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional)
- w_list_value, b_list_value = fact.get_weight_bias()
-
- h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
- c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
- input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32))
-
- # graph mode
- context.set_context(mode=context.GRAPH_MODE)
- net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
- bidirectional=bidirectional, dropout=0.0)
- net.lstm.w_list = w_list_value
- net.lstm.b_list = b_list_value
-
- grad_net_inp = GradOfAllInputsAndParams(net, sens_param=False)
- grad_net_inp.set_train()
- out_grad, _ = grad_net_inp(input_ms, h0, c0)
- x_grad = out_grad[0].asnumpy()
- h_grad = out_grad[1].asnumpy()
- c_grad = out_grad[2].asnumpy()
-
- # pynative mode
- context.set_context(mode=context.PYNATIVE_MODE)
- net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
- bidirectional=bidirectional, dropout=0.0)
- net_pynative.lstm.w_list = w_list_value
- net_pynative.lstm.b_list = b_list_value
-
- grad_net_inp_pynative = GradOfAllInputsAndParams(net_pynative, sens_param=False)
- grad_net_inp_pynative.set_train()
- out_grad_pynative, _ = grad_net_inp_pynative(input_ms, h0, c0)
- x_grad_pynative = out_grad_pynative[0].asnumpy()
- h_grad_pynative = out_grad_pynative[1].asnumpy()
- c_grad_pynative = out_grad_pynative[2].asnumpy()
-
- assert np.allclose(x_grad, x_grad_pynative, 0.0001, 0.0001)
- assert np.allclose(h_grad, h_grad_pynative, 0.0001, 0.0001)
- assert np.allclose(c_grad, c_grad_pynative, 0.0001, 0.0001)
|