You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_loss_and_optimizer.py 7.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. from mindspore import context
  16. import mindspore.nn as nn
  17. from mindspore.ops import operations as P
  18. from mindspore import Tensor, Parameter
  19. import mindspore as ms
  20. from mindspore.common.api import _executor
  21. from mindspore.ops import composite as C
  22. from mindspore.nn.optim import Momentum, LARS
  23. from mindspore.nn import TrainOneStepCell, WithLossCell
  24. class NetWithLoss(nn.Cell):
  25. def __init__(self, network, strategy3):
  26. super(NetWithLoss, self).__init__()
  27. self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3)
  28. self.network = network
  29. def construct(self, x, b):
  30. predict = self.network(x)
  31. return self.loss(predict, b)[0]
  32. def test_momentum():
  33. class Net(nn.Cell):
  34. def __init__(self, strategy1, strategy2, weight):
  35. super().__init__()
  36. self.weight = Parameter(weight, "w1")
  37. self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
  38. self.relu = P.ReLU().set_strategy(strategy2)
  39. def construct(self, x):
  40. out = self.matmul(x, self.weight)
  41. out = self.relu(out)
  42. return out
  43. context.set_auto_parallel_context(device_num=4, global_rank=0)
  44. strategy1 = ((2, 1), (2, 1))
  45. strategy2 = ((4, 1), )
  46. strategy3 = ((4, 1), (4, 1))
  47. x = Tensor(np.ones([64, 32]), dtype=ms.float32)
  48. weight = Tensor(np.ones([64, 32]), dtype=ms.float32)
  49. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  50. net = Net(strategy1, strategy2, weight)
  51. optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
  52. net_with_loss = NetWithLoss(net, strategy3)
  53. train_net = TrainOneStepCell(net_with_loss, optimizer)
  54. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  55. _executor.compile(train_net, x, b)
  56. def test_momentum_with_loss_scale():
  57. class Net(nn.Cell):
  58. def __init__(self, strategy1, strategy2, weight):
  59. super().__init__()
  60. self.weight = Parameter(weight, "w1")
  61. self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
  62. self.relu = P.ReLU().set_strategy(strategy2)
  63. def construct(self, x):
  64. out = self.matmul(x, self.weight)
  65. out = self.relu(out)
  66. return out
  67. context.set_auto_parallel_context(device_num=4, global_rank=0)
  68. strategy1 = ((2, 1), (2, 1))
  69. strategy2 = ((4, 1), )
  70. strategy3 = ((4, 1), (4, 1))
  71. x = Tensor(np.ones([64, 32]), dtype=ms.float32)
  72. weight = Tensor(np.ones([64, 32]), dtype=ms.float32)
  73. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  74. net = Net(strategy1, strategy2, weight)
  75. optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9, loss_scale=0.5)
  76. net_with_loss = NetWithLoss(net, strategy3)
  77. train_net = TrainOneStepCell(net_with_loss, optimizer)
  78. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  79. _executor.compile(train_net, x, b)
  80. def test_momentum_with_dynamic_lr():
  81. class Net(nn.Cell):
  82. def __init__(self, strategy1, strategy2, weight):
  83. super().__init__()
  84. self.weight = Parameter(weight, "w1")
  85. self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
  86. self.relu = P.ReLU().set_strategy(strategy2)
  87. def construct(self, x):
  88. out = self.matmul(x, self.weight)
  89. out = self.relu(out)
  90. return out
  91. context.set_auto_parallel_context(device_num=4, global_rank=0)
  92. strategy1 = ((2, 1), (2, 1))
  93. strategy2 = ((4, 1), )
  94. strategy3 = ((4, 1), (4, 1))
  95. x = Tensor(np.ones([64, 32]), dtype=ms.float32)
  96. weight = Tensor(np.ones([64, 32]), dtype=ms.float32)
  97. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  98. net = Net(strategy1, strategy2, weight)
  99. lr = Tensor(np.ones([6]), dtype=ms.float32)
  100. optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9)
  101. net_with_loss = NetWithLoss(net, strategy3)
  102. train_net = TrainOneStepCell(net_with_loss, optimizer)
  103. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  104. _executor.compile(train_net, x, b)
  105. def test_momentum_with_loss_scale_and_dynamic_lr():
  106. class Net(nn.Cell):
  107. def __init__(self, strategy1, strategy2, weight):
  108. super().__init__()
  109. self.weight = Parameter(weight, "w1")
  110. self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
  111. self.relu = P.ReLU().set_strategy(strategy2)
  112. def construct(self, x):
  113. out = self.matmul(x, self.weight)
  114. out = self.relu(out)
  115. return out
  116. context.set_auto_parallel_context(device_num=4, global_rank=0)
  117. strategy1 = ((2, 1), (2, 1))
  118. strategy2 = ((4, 1), )
  119. strategy3 = ((4, 1), (4, 1))
  120. x = Tensor(np.ones([64, 32]), dtype=ms.float32)
  121. weight = Tensor(np.ones([64, 32]), dtype=ms.float32)
  122. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  123. net = Net(strategy1, strategy2, weight)
  124. lr = Tensor(np.ones([6]), dtype=ms.float32)
  125. optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9, loss_scale=0.5)
  126. net_with_loss = NetWithLoss(net, strategy3)
  127. train_net = TrainOneStepCell(net_with_loss, optimizer)
  128. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  129. _executor.compile(train_net, x, b)
  130. def test_lars():
  131. class Net(nn.Cell):
  132. def __init__(self, strategy1, strategy2, weight):
  133. super().__init__()
  134. self.weight = Parameter(weight, "w1")
  135. self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
  136. self.relu = P.ReLU().set_strategy(strategy2)
  137. def construct(self, x):
  138. out = self.matmul(x, self.weight)
  139. out = self.relu(out)
  140. return out
  141. context.set_auto_parallel_context(device_num=4, global_rank=0)
  142. strategy1 = ((2, 1), (2, 1))
  143. strategy2 = ((4, 1), )
  144. strategy3 = ((4, 1), (4, 1))
  145. x = Tensor(np.ones([64, 32]), dtype=ms.float32)
  146. weight = Tensor(np.ones([64, 32]), dtype=ms.float32)
  147. b = Tensor(np.ones([64, 64]), dtype=ms.float32)
  148. net = Net(strategy1, strategy2, weight)
  149. lr = Tensor(np.ones([6]), dtype=ms.float32)
  150. SGD = Momentum(net.trainable_params(), lr, 0.9)
  151. optimizer = LARS(SGD, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name,
  152. lars_filter=lambda x: 'bn' not in x.name)
  153. net_with_loss = NetWithLoss(net, strategy3)
  154. train_net = TrainOneStepCell(net_with_loss, optimizer)
  155. context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
  156. _executor.compile(train_net, x, b)