You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_hybird_parallel_activation.py 9.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. import mindspore as ms
  16. from mindspore import context
  17. import mindspore.nn as nn
  18. from mindspore.ops import operations as P
  19. from mindspore import Tensor
  20. from tests.ut.python.ops.test_math_ops import VirtualLoss
  21. from mindspore.common.api import _executor
  22. from mindspore.ops import composite as C
  23. class NetWithLoss(nn.Cell):
  24. def __init__(self, network):
  25. super(NetWithLoss, self).__init__()
  26. self.loss = VirtualLoss()
  27. self.network = network
  28. def construct(self, x, y, b):
  29. predict = self.network(x, y, b)
  30. return self.loss(predict)
  31. class GradWrap(nn.Cell):
  32. def __init__(self, network):
  33. super(GradWrap, self).__init__()
  34. self.network = network
  35. def construct(self, x, y, b):
  36. return C.grad_all(self.network)(x, y, b)
  37. def compile(net, x, y, b):
  38. net.set_auto_parallel()
  39. _executor.compile(net, x, y, b)
  40. def test_matmul_tanh():
  41. class Net(nn.Cell):
  42. def __init__(self, strategy1, strategy2, strategy3):
  43. super().__init__()
  44. self.matmul1 = P.MatMul().set_strategy(strategy1)
  45. self.matmul2 = P.MatMul().set_strategy(strategy2)
  46. self.tanh = P.Tanh().set_strategy(strategy3)
  47. def construct(self, x, y, b):
  48. out = self.tanh(self.matmul1(x, y))
  49. out = self.matmul2(out, b)
  50. return out
  51. strategy1 = ((16, 1), (1, 1))
  52. strategy2 = ((1, 1), (1, 16))
  53. strategy3 = ((4, 4), )
  54. net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
  55. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  56. context.set_auto_parallel_context(device_num=16, global_rank=0)
  57. x = Tensor(np.ones([128, 32]), dtype=ms.float32)
  58. y = Tensor(np.ones([32, 64]), dtype=ms.float32)
  59. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  60. compile(net, x, y, b)
  61. def test_matmul_activation():
  62. class Net(nn.Cell):
  63. def __init__(self, strategy1, strategy2, strategy3):
  64. super().__init__()
  65. self.matmul1 = P.MatMul().set_strategy(strategy1)
  66. self.matmul2 = P.MatMul().set_strategy(strategy2)
  67. self.activation = P.ReLU().set_strategy(strategy3)
  68. def construct(self, x, y, b):
  69. out = self.activation(self.matmul1(x, y))
  70. out = self.matmul2(out, b)
  71. return out
  72. strategy1 = ((16, 1), (1, 1))
  73. strategy2 = ((1, 1), (1, 16))
  74. strategy3 = ((4, 4), )
  75. net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
  76. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  77. context.set_auto_parallel_context(device_num=16, global_rank=0)
  78. x = Tensor(np.ones([128, 32]), dtype=ms.float32)
  79. y = Tensor(np.ones([32, 64]), dtype=ms.float32)
  80. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  81. compile(net, x, y, b)
  82. def test_matmul_softmax():
  83. class Net(nn.Cell):
  84. def __init__(self, strategy1, strategy2, strategy3):
  85. super().__init__()
  86. self.matmul1 = P.MatMul().set_strategy(strategy1)
  87. self.matmul2 = P.MatMul().set_strategy(strategy2)
  88. self.softmax = P.Softmax().set_strategy(strategy3)
  89. def construct(self, x, y, b):
  90. out = self.softmax(self.matmul1(x, y))
  91. out = self.matmul2(out, b)
  92. return out
  93. strategy1 = ((16, 1), (1, 1))
  94. strategy2 = ((1, 1), (1, 16))
  95. strategy3 = ((16, 1), )
  96. net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
  97. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  98. context.set_auto_parallel_context(device_num=16, global_rank=0)
  99. x = Tensor(np.ones([128, 32]), dtype=ms.float32)
  100. y = Tensor(np.ones([32, 64]), dtype=ms.float32)
  101. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  102. compile(net, x, y, b)
  103. def test_matmul_logsoftmax():
  104. class Net(nn.Cell):
  105. def __init__(self, strategy1, strategy2, strategy3):
  106. super().__init__()
  107. self.matmul1 = P.MatMul().set_strategy(strategy1)
  108. self.matmul2 = P.MatMul().set_strategy(strategy2)
  109. self.logsoftmax = P.LogSoftmax().set_strategy(strategy3)
  110. def construct(self, x, y, b):
  111. out = self.logsoftmax(self.matmul1(x, y))
  112. out = self.matmul2(out, b)
  113. return out
  114. strategy1 = ((4, 2), (2, 2))
  115. strategy2 = ((2, 4), (4, 2))
  116. strategy3 = ((16, 1), )
  117. net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
  118. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  119. context.set_auto_parallel_context(device_num=16, global_rank=0)
  120. x = Tensor(np.ones([128, 32]), dtype=ms.float32)
  121. y = Tensor(np.ones([32, 64]), dtype=ms.float32)
  122. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  123. compile(net, x, y, b)
  124. def test_activations():
  125. class Net(nn.Cell):
  126. def __init__(self, strategy1, strategy2, strategy3):
  127. super().__init__()
  128. self.matmul1 = P.MatMul().set_strategy(strategy1)
  129. self.matmul2 = P.MatMul().set_strategy(strategy2)
  130. self.gelu = P.Gelu().set_strategy(strategy3)
  131. self.tanh = P.Tanh().set_strategy(strategy3)
  132. self.softmax = P.Softmax().set_strategy(strategy3)
  133. self.logsoftmax = P.LogSoftmax().set_strategy(strategy3)
  134. def construct(self, x, y, b):
  135. out = self.gelu(self.tanh(self.matmul1(x, y)))
  136. out = self.logsoftmax(self.softmax(self.matmul2(out, b)))
  137. return out
  138. strategy1 = ((1, 2), (2, 2))
  139. strategy2 = ((2, 2), (2, 1))
  140. strategy3 = ((4, 1), )
  141. net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
  142. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  143. context.set_auto_parallel_context(device_num=4, global_rank=0)
  144. x = Tensor(np.ones([128, 32]), dtype=ms.float32)
  145. y = Tensor(np.ones([32, 64]), dtype=ms.float32)
  146. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  147. compile(net, x, y, b)
  148. def test_activations_repeated_calculation():
  149. class Net(nn.Cell):
  150. def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6):
  151. super().__init__()
  152. self.matmul1 = P.MatMul().set_strategy(strategy1)
  153. self.matmul2 = P.MatMul().set_strategy(strategy2)
  154. self.gelu = P.Gelu().set_strategy(strategy3)
  155. self.tanh = P.Tanh().set_strategy(strategy4)
  156. self.softmax = P.Softmax().set_strategy(strategy5)
  157. self.logsoftmax = P.LogSoftmax().set_strategy(strategy6)
  158. def construct(self, x, y, b):
  159. out = self.gelu(self.tanh(self.matmul1(x, y)))
  160. out = self.logsoftmax(self.softmax(self.matmul2(out, b)))
  161. return out
  162. strategy1 = ((2, 4), (4, 8))
  163. strategy2 = ((2, 2), (2, 1))
  164. strategy3 = ((2, 1), )
  165. strategy4 = ((2, 2), )
  166. strategy5 = ((4, 1), )
  167. strategy6 = ((8, 1), )
  168. net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3, strategy4, strategy5, strategy6)))
  169. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  170. context.set_auto_parallel_context(device_num=64, global_rank=0)
  171. x = Tensor(np.ones([128, 32]), dtype=ms.float32)
  172. y = Tensor(np.ones([32, 64]), dtype=ms.float32)
  173. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  174. compile(net, x, y, b)
  175. def test_activations_axis_tuple():
  176. class Net(nn.Cell):
  177. def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6):
  178. super().__init__()
  179. self.matmul1 = P.MatMul().set_strategy(strategy1)
  180. self.matmul2 = P.MatMul().set_strategy(strategy2)
  181. self.gelu = P.Gelu().set_strategy(strategy3)
  182. self.tanh = P.Tanh().set_strategy(strategy4)
  183. self.softmax = P.Softmax(axis=(0, 1)).set_strategy(strategy5)
  184. self.logsoftmax = P.LogSoftmax().set_strategy(strategy6)
  185. def construct(self, x, y, b):
  186. out = self.gelu(self.tanh(self.matmul1(x, y)))
  187. out = self.logsoftmax(self.softmax(self.matmul2(out, b)))
  188. return out
  189. strategy1 = ((2, 4), (4, 8))
  190. strategy2 = ((2, 2), (2, 1))
  191. strategy3 = ((2, 1), )
  192. strategy4 = ((2, 2), )
  193. strategy5 = ((1, 1), )
  194. strategy6 = ((8, 1), )
  195. net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3, strategy4, strategy5, strategy6)))
  196. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  197. context.set_auto_parallel_context(device_num=64, global_rank=0)
  198. x = Tensor(np.ones([128, 32]), dtype=ms.float32)
  199. y = Tensor(np.ones([32, 64]), dtype=ms.float32)
  200. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  201. compile(net, x, y, b)