You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

lstm.py 7.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """lstm"""
  16. from mindspore.ops import operations as P
  17. from mindspore.nn.cell import Cell
  18. from mindspore.common.parameter import Parameter
  19. from mindspore.common.initializer import initializer
  20. from mindspore._checkparam import Validator as validator
  21. class LSTM(Cell):
  22. r"""
  23. LSTM (Long Short-Term Memory) layer.
  24. Applies a LSTM to the input.
  25. There are two pipelines connecting two consecutive cells in a LSTM model; one is cell state pipeline
  26. and another is hidden state pipeline. Denote two consecutive time nodes as :math:`t-1` and :math:`t`.
  27. Given an input :math:`x_t` at time :math:`t`, an hidden state :math:`h_{t-1}` and an cell
  28. state :math:`c_{t-1}` of the layer at time :math:`{t-1}`, the cell state and hidden state at
  29. time :math:`t` is computed using an gating mechanism. Input gate :math:`i_t` is designed to protect the cell
  30. from perturbation by irrelevant inputs. Forget gate :math:`f_t` affords protection of the cell by forgetting
  31. some information in the past, which is stored in :math:`h_{t-1}`. Output gate :math:`o_t` protects other
  32. units from perturbation by currently irrelevant memory contents. Candidate cell state :math:`\tilde{c}_t` is
  33. calculated with the current input, on which the input gate will be applied. Finally, current cell state
  34. :math:`c_{t}` and hidden state :math:`h_{t}` are computed with the calculated gates and cell states. The complete
  35. formulation is as follows.
  36. .. math::
  37. \begin{array}{ll} \\
  38. i_t = \sigma(W_{ix} x_t + b_{ix} + W_{ih} h_{(t-1)} + b_{ih}) \\
  39. f_t = \sigma(W_{fx} x_t + b_{fx} + W_{fh} h_{(t-1)} + b_{fh}) \\
  40. \tilde{c}_t = \tanh(W_{cx} x_t + b_{cx} + W_{ch} h_{(t-1)} + b_{ch}) \\
  41. o_t = \sigma(W_{ox} x_t + b_{ox} + W_{oh} h_{(t-1)} + b_{oh}) \\
  42. c_t = f_t * c_{(t-1)} + i_t * \tilde{c}_t \\
  43. h_t = o_t * \tanh(c_t) \\
  44. \end{array}
  45. Here :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b`
  46. are learnable weights between the output and the input in the formula. For instance,
  47. :math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`.
  48. Details can be found in paper `LONG SHORT-TERM MEMORY
  49. <https://www.bioinf.jku.at/publications/older/2604.pdf>`_ and
  50. `Long Short-Term Memory Recurrent Neural Network Architectures for Large Scale Acoustic Modeling
  51. <https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/43905.pdf>`_.
  52. Args:
  53. input_size (int): Number of features of input.
  54. hidden_size (int): Number of features of hidden layer.
  55. num_layers (int): Number of layers of stacked LSTM . Default: 1.
  56. has_bias (bool): Specifies whether has bias `b_ih` and `b_hh`. Default: True.
  57. batch_first (bool): Specifies whether the first dimension of input is batch_size. Default: False.
  58. dropout (float): If not 0, append `Dropout` layer on the outputs of each
  59. LSTM layer except the last layer. Default 0. The range of dropout is [0.0, 1.0].
  60. bidirectional (bool): Specifies whether this is a bidirectional LSTM. If set True,
  61. number of directions will be 2 otherwise number of directions is 1. Default: False.
  62. Inputs:
  63. - **input** (Tensor) - Tensor of shape (seq_len, batch_size, `input_size`).
  64. - **hx** (tuple) - A tuple of two Tensors (h_0, c_0) both of data type mindspore.float32 or
  65. mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
  66. Data type of `hx` should be the same of `input`.
  67. Outputs:
  68. Tuple, a tuple constains (`output`, (`h_n`, `c_n`)).
  69. - **output** (Tensor) - Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`).
  70. - **hx_n** (tuple) - A tuple of two Tensor (h_n, c_n) both of shape
  71. (num_directions * `num_layers`, batch_size, `hidden_size`).
  72. Examples:
  73. >>> class LstmNet(nn.Cell):
  74. >>> def __init__(self, input_size, hidden_size, num_layers, has_bias, batch_first, bidirectional):
  75. >>> super(LstmNet, self).__init__()
  76. >>> self.lstm = nn.LSTM(input_size=input_size,
  77. >>> hidden_size=hidden_size,
  78. >>> num_layers=num_layers,
  79. >>> has_bias=has_bias,
  80. >>> batch_first=batch_first,
  81. >>> bidirectional=bidirectional,
  82. >>> dropout=0.0)
  83. >>>
  84. >>> def construct(self, inp, h0, c0):
  85. >>> return self.lstm(inp, (h0, c0))
  86. >>>
  87. >>> net = LstmNet(10, 12, 2, has_bias=True, batch_first=True, bidirectional=False)
  88. >>> input = Tensor(np.ones([3, 5, 10]).astype(np.float32))
  89. >>> h0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
  90. >>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
  91. >>> output, (hn, cn) = net(input, h0, c0)
  92. """
  93. def __init__(self,
  94. input_size,
  95. hidden_size,
  96. num_layers=1,
  97. has_bias=True,
  98. batch_first=False,
  99. dropout=0,
  100. bidirectional=False):
  101. super(LSTM, self).__init__()
  102. self.input_size = input_size
  103. self.hidden_size = hidden_size
  104. self.num_layers = num_layers
  105. self.has_bias = has_bias
  106. self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
  107. self.dropout = float(dropout)
  108. self.bidirectional = bidirectional
  109. if self.batch_first:
  110. self.transpose1 = P.Transpose()
  111. self.transpose2 = P.Transpose()
  112. self.lstm = P.LSTM(input_size=self.input_size,
  113. hidden_size=self.hidden_size,
  114. num_layers=self.num_layers,
  115. has_bias=self.has_bias,
  116. bidirectional=self.bidirectional,
  117. dropout=self.dropout)
  118. num_directions = 2 if self.bidirectional else 1
  119. weight_size = 0
  120. gate_size = 4 * self.hidden_size
  121. for layer in range(self.num_layers):
  122. input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions
  123. increment_size = gate_size * input_layer_size
  124. increment_size += gate_size * self.hidden_size
  125. if self.has_bias:
  126. increment_size += 2 * gate_size
  127. weight_size += increment_size * num_directions
  128. self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight')
  129. self.fill = P.Fill()
  130. self.shape = P.Shape()
  131. def construct(self, x, hx):
  132. if self.batch_first:
  133. x = self.transpose1(x, (1, 0, 2))
  134. h0, c0 = hx
  135. output, hn, cn, _, _ = self.lstm(x, h0, c0, self.weight)
  136. if self.batch_first:
  137. output = self.transpose2(output, (1, 0, 2))
  138. return (output, (hn, cn))