You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_adam.py 8.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """ test adam """
  16. import numpy as np
  17. import pytest
  18. import mindspore.nn as nn
  19. from mindspore import Tensor, Parameter, context
  20. from mindspore.common.api import _executor
  21. from mindspore.nn import TrainOneStepCell, WithLossCell
  22. from mindspore.nn.optim import Adam, AdamWeightDecay
  23. from mindspore.ops import operations as P
  24. @pytest.fixture(scope="module", autouse=True)
  25. def setup_teardown():
  26. context.set_context(enable_sparse=True)
  27. yield
  28. context.set_context(enable_sparse=False)
  29. class Net(nn.Cell):
  30. """ Net definition """
  31. def __init__(self):
  32. super(Net, self).__init__()
  33. self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight")
  34. self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias")
  35. self.matmul = P.MatMul()
  36. self.biasAdd = P.BiasAdd()
  37. def construct(self, x):
  38. x = self.biasAdd(self.matmul(x, self.weight), self.bias)
  39. return x
  40. class NetWithoutWeight(nn.Cell):
  41. def __init__(self):
  42. super(NetWithoutWeight, self).__init__()
  43. self.matmul = P.MatMul()
  44. def construct(self, x):
  45. x = self.matmul(x, x)
  46. return x
  47. class NetWithSparseGatherV2(nn.Cell):
  48. """ NetWithSparseGatherV2 definition """
  49. def __init__(self):
  50. super(NetWithSparseGatherV2, self).__init__()
  51. self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
  52. self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
  53. self.axis = 0
  54. self.gather = P.SparseGatherV2()
  55. def construct(self, indices, label):
  56. return self.gather(self.weight1, indices, self.axis) + self.weight2
  57. def test_adamwithoutparam():
  58. net = NetWithoutWeight()
  59. net.set_train()
  60. with pytest.raises(ValueError, match=r"Optimizer got an empty parameter list"):
  61. AdamWeightDecay(net.trainable_params(), learning_rate=0.1)
  62. def test_adamw_compile():
  63. """ test_adamw_compile """
  64. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  65. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  66. net = Net()
  67. net.set_train()
  68. loss = nn.SoftmaxCrossEntropyWithLogits()
  69. optimizer = AdamWeightDecay(net.trainable_params(), learning_rate=0.1)
  70. net_with_loss = WithLossCell(net, loss)
  71. train_network = TrainOneStepCell(net_with_loss, optimizer)
  72. _executor.compile(train_network, inputs, label)
  73. def test_adam_compile():
  74. """ test adam compile """
  75. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  76. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  77. net = Net()
  78. net.set_train()
  79. loss = nn.SoftmaxCrossEntropyWithLogits()
  80. optimizer = Adam(net.trainable_params(), learning_rate=0.1, weight_decay=0.9)
  81. net_with_loss = WithLossCell(net, loss)
  82. train_network = TrainOneStepCell(net_with_loss, optimizer)
  83. _executor.compile(train_network, inputs, label)
  84. def test_sparse_adam_compile():
  85. """ test_sparse_adam_compile """
  86. indices = Tensor(np.array([0, 1]).astype(np.int32))
  87. label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
  88. net = NetWithSparseGatherV2()
  89. net.set_train()
  90. optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9)
  91. optimizer.target = 'CPU'
  92. train_network = TrainOneStepCell(net, optimizer)
  93. _executor.compile(train_network, indices, label)
  94. def test_sparse_adam():
  95. """ test_sparse_adam """
  96. indices = Tensor(np.array([0, 1]).astype(np.int32))
  97. label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
  98. net = NetWithSparseGatherV2()
  99. net.set_train()
  100. optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9)
  101. train_network = TrainOneStepCell(net, optimizer)
  102. _executor.compile(train_network, indices, label)
  103. def test_adam_group1():
  104. """ test_adam_group_lr_and_weight_decay """
  105. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  106. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  107. net = Net()
  108. net.set_train()
  109. loss = nn.SoftmaxCrossEntropyWithLogits()
  110. net_with_loss = WithLossCell(net, loss)
  111. all_params = net.trainable_params()
  112. poly_decay_lr = nn.polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0)
  113. group_params = [{'params': [all_params[0]], 'lr': poly_decay_lr, 'weight_decay': 0.9},
  114. {'params': [all_params[1]]}]
  115. optimizer = nn.Adam(group_params, learning_rate=0.1)
  116. train_network = TrainOneStepCell(net_with_loss, optimizer)
  117. _executor.compile(train_network, inputs, label)
  118. def test_adam_group2():
  119. """ test_adam_group_lr_and_weight_decay """
  120. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  121. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  122. net = Net()
  123. net.set_train()
  124. loss = nn.SoftmaxCrossEntropyWithLogits()
  125. net_with_loss = WithLossCell(net, loss)
  126. all_params = net.trainable_params()
  127. schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
  128. group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
  129. {'params': [all_params[1]]}]
  130. optimizer = nn.Adam(group_params, learning_rate=schedule_lr)
  131. train_network = TrainOneStepCell(net_with_loss, optimizer)
  132. _executor.compile(train_network, inputs, label)
  133. def test_adamweightdecay_group():
  134. """ test_adam_group_lr_and_weight_decay """
  135. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  136. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  137. net = Net()
  138. net.set_train()
  139. loss = nn.SoftmaxCrossEntropyWithLogits()
  140. net_with_loss = WithLossCell(net, loss)
  141. all_params = net.trainable_params()
  142. schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
  143. group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
  144. {'params': [all_params[1]]}]
  145. optimizer = nn.AdamWeightDecay(group_params, learning_rate=schedule_lr)
  146. train_network = TrainOneStepCell(net_with_loss, optimizer)
  147. _executor.compile(train_network, inputs, label)
  148. def test_adamoffload_group():
  149. """ test_adam_group_lr_and_weight_decay """
  150. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  151. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  152. net = Net()
  153. net.set_train()
  154. loss = nn.SoftmaxCrossEntropyWithLogits()
  155. net_with_loss = WithLossCell(net, loss)
  156. all_params = net.trainable_params()
  157. schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
  158. group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
  159. {'params': [all_params[1]]}]
  160. optimizer = nn.AdamOffload(group_params, learning_rate=schedule_lr)
  161. train_network = TrainOneStepCell(net_with_loss, optimizer)
  162. _executor.compile(train_network, inputs, label)
  163. def test_AdamWeightDecay_beta1():
  164. net = Net()
  165. print("**********", net.get_parameters())
  166. with pytest.raises(ValueError):
  167. AdamWeightDecay(net.get_parameters(), beta1=1.0, learning_rate=0.1)
  168. def test_AdamWeightDecay_beta2():
  169. net = Net()
  170. with pytest.raises(ValueError):
  171. AdamWeightDecay(net.get_parameters(), beta2=1.0, learning_rate=0.1)
  172. def test_AdamWeightDecay_e():
  173. net = Net()
  174. with pytest.raises(ValueError):
  175. AdamWeightDecay(net.get_parameters(), eps=-0.1, learning_rate=0.1)
  176. def test_adam_mindspore_with_empty_params():
  177. net = nn.Flatten()
  178. with pytest.raises(ValueError, match=r"Optimizer got an empty parameter list"):
  179. AdamWeightDecay(net.get_parameters())