diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py index f038009669..06f9d90a32 100755 --- a/mindspore/nn/layer/lstm.py +++ b/mindspore/nn/layer/lstm.py @@ -125,13 +125,6 @@ class LSTM(Cell): self.num_layers = num_layers self.bidirectional = bidirectional self.dropout = dropout - self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0) - self.concat = P.Concat(axis=0) - self.concat_2dim = P.Concat(axis=2) - self.cast = P.Cast() - self.shape = P.Shape() - if dropout != 0: - self.dropout_op = nn.Dropout(float(dropout)) self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, @@ -143,36 +136,49 @@ class LSTM(Cell): gate_size = 4 * hidden_size stdv = 1 / math.sqrt(hidden_size) num_directions = 2 if bidirectional else 1 - b0 = np.zeros(gate_size, dtype=np.float16) - self.w_list = [] - self.b_list = [] - self.rnns_fw = P.DynamicRNN(forget_bias=0.0) - self.rnns_bw = P.DynamicRNN(forget_bias=0.0) - for layer in range(num_layers): - input_layer_size = input_size if layer == 0 else hidden_size * num_directions - increment_size = gate_size * input_layer_size - increment_size += gate_size * hidden_size - w_shape = input_size if layer == 0 else (num_directions * hidden_size) - w_np = np.random.uniform(-stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) - self.w_list.append(Parameter( - initializer(Tensor(w_np), [w_shape + hidden_size, gate_size]), name='weight_fw' + str(layer))) - if has_bias: - increment_size += 2 * gate_size - b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) - self.b_list.append(Parameter(initializer(Tensor(b_np), [gate_size]), name='bias_fw' + str(layer))) - else: - self.b_list.append(Parameter(initializer(Tensor(b0), [gate_size]), name='bias_fw' + str(layer))) - weight_size += increment_size * num_directions - if bidirectional: - w_bw_np = np.random.uniform(-stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) - self.w_list.append(Parameter(initializer(Tensor(w_bw_np), [w_shape + hidden_size, gate_size]), - name='weight_bw' + str(layer))) - b_bw_np = np.random.uniform(-stdv, stdv, (4 * hidden_size)).astype(np.float16) if has_bias else b0 - self.b_list.append(Parameter(initializer(Tensor(b_bw_np), [gate_size]), name='bias_bw' + str(layer))) - self.w_list = ParameterTuple(self.w_list) - self.b_list = ParameterTuple(self.b_list) - w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) - self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight') + if self.is_ascend: + self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0) + self.concat = P.Concat(axis=0) + self.concat_2dim = P.Concat(axis=2) + self.cast = P.Cast() + self.shape = P.Shape() + if dropout != 0: + self.dropout_op = nn.Dropout(float(dropout)) + b0 = np.zeros(gate_size, dtype=np.float16) + self.w_list = [] + self.b_list = [] + self.rnns_fw = P.DynamicRNN(forget_bias=0.0) + self.rnns_bw = P.DynamicRNN(forget_bias=0.0) + + for layer in range(num_layers): + w_shape = input_size if layer == 0 else (num_directions * hidden_size) + w_np = np.random.uniform(-stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) + self.w_list.append(Parameter( + initializer(Tensor(w_np), [w_shape + hidden_size, gate_size]), name='weight_fw' + str(layer))) + if has_bias: + b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) + self.b_list.append(Parameter(initializer(Tensor(b_np), [gate_size]), name='bias_fw' + str(layer))) + else: + self.b_list.append(Parameter(initializer(Tensor(b0), [gate_size]), name='bias_fw' + str(layer))) + if bidirectional: + w_bw_np = np.random.uniform(-stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) + self.w_list.append(Parameter(initializer(Tensor(w_bw_np), [w_shape + hidden_size, gate_size]), + name='weight_bw' + str(layer))) + b_bw_np = np.random.uniform(-stdv, stdv, (4 * hidden_size)).astype(np.float16) if has_bias else b0 + self.b_list.append(Parameter(initializer(Tensor(b_bw_np), [gate_size]), + name='bias_bw' + str(layer))) + self.w_list = ParameterTuple(self.w_list) + self.b_list = ParameterTuple(self.b_list) + else: + for layer in range(num_layers): + input_layer_size = input_size if layer == 0 else hidden_size * num_directions + increment_size = gate_size * input_layer_size + increment_size += gate_size * hidden_size + if has_bias: + increment_size += 2 * gate_size + weight_size += increment_size * num_directions + w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) + self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight') def _stacked_bi_dynamic_rnn(self, x, init_h, init_c, weight, bias): """stacked bidirectional dynamic_rnn"""