You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_adam.py 7.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """ test adam """
  16. import numpy as np
  17. import pytest
  18. import mindspore.nn as nn
  19. from mindspore import Tensor, Parameter, context
  20. from mindspore.common.api import _executor
  21. from mindspore.nn import TrainOneStepCell, WithLossCell
  22. from mindspore.nn.optim import Adam, AdamWeightDecay
  23. from mindspore.ops import operations as P
  24. import mindspore.nn.learning_rate_schedule as lr_schedules
  25. from mindspore.nn.dynamic_lr import polynomial_decay_lr
  26. context.set_context(enable_sparse=True)
  27. class Net(nn.Cell):
  28. """ Net definition """
  29. def __init__(self):
  30. super(Net, self).__init__()
  31. self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight")
  32. self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias")
  33. self.matmul = P.MatMul()
  34. self.biasAdd = P.BiasAdd()
  35. def construct(self, x):
  36. x = self.biasAdd(self.matmul(x, self.weight), self.bias)
  37. return x
  38. class NetWithoutWeight(nn.Cell):
  39. def __init__(self):
  40. super(NetWithoutWeight, self).__init__()
  41. self.matmul = P.MatMul()
  42. def construct(self, x):
  43. x = self.matmul(x, x)
  44. return x
  45. class NetWithSparseGatherV2(nn.Cell):
  46. """ NetWithSparseGatherV2 definition """
  47. def __init__(self):
  48. super(NetWithSparseGatherV2, self).__init__()
  49. self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
  50. self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
  51. self.axis = 0
  52. self.gather = P.SparseGatherV2()
  53. def construct(self, indices, label):
  54. return self.gather(self.weight1, indices, self.axis) + self.weight2
  55. def test_adamwithoutparam():
  56. net = NetWithoutWeight()
  57. net.set_train()
  58. with pytest.raises(ValueError, match=r"Optimizer got an empty parameter list"):
  59. AdamWeightDecay(net.trainable_params(), learning_rate=0.1)
  60. def test_adamw_compile():
  61. """ test_adamw_compile """
  62. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  63. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  64. net = Net()
  65. net.set_train()
  66. loss = nn.SoftmaxCrossEntropyWithLogits()
  67. optimizer = AdamWeightDecay(net.trainable_params(), learning_rate=0.1)
  68. net_with_loss = WithLossCell(net, loss)
  69. train_network = TrainOneStepCell(net_with_loss, optimizer)
  70. _executor.compile(train_network, inputs, label)
  71. def test_adam_compile():
  72. """ test adam compile """
  73. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  74. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  75. net = Net()
  76. net.set_train()
  77. loss = nn.SoftmaxCrossEntropyWithLogits()
  78. optimizer = Adam(net.trainable_params(), learning_rate=0.1, weight_decay=0.9)
  79. net_with_loss = WithLossCell(net, loss)
  80. train_network = TrainOneStepCell(net_with_loss, optimizer)
  81. _executor.compile(train_network, inputs, label)
  82. def test_sparse_adam_compile():
  83. """ test_sparse_adam_compile """
  84. indices = Tensor(np.array([0, 1]).astype(np.int32))
  85. label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
  86. net = NetWithSparseGatherV2()
  87. net.set_train()
  88. optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9)
  89. optimizer.target = 'CPU'
  90. train_network = TrainOneStepCell(net, optimizer)
  91. _executor.compile(train_network, indices, label)
  92. def test_sparse_adam():
  93. """ test_sparse_adam """
  94. indices = Tensor(np.array([0, 1]).astype(np.int32))
  95. label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
  96. net = NetWithSparseGatherV2()
  97. net.set_train()
  98. optimizer = Adam(net.trainable_params(), learning_rate=0.1, loss_scale=1024.0, weight_decay=0.9)
  99. train_network = TrainOneStepCell(net, optimizer)
  100. _executor.compile(train_network, indices, label)
  101. def test_adam_group1():
  102. """ test_adam_group_lr_and_weight_decay """
  103. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  104. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  105. net = Net()
  106. net.set_train()
  107. loss = nn.SoftmaxCrossEntropyWithLogits()
  108. net_with_loss = WithLossCell(net, loss)
  109. all_params = net.trainable_params()
  110. poly_decay_lr = polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0)
  111. group_params = [{'params': [all_params[0]], 'lr': poly_decay_lr, 'weight_decay': 0.9},
  112. {'params': [all_params[1]]}]
  113. optimizer = nn.Adam(group_params, learning_rate=0.1)
  114. train_network = TrainOneStepCell(net_with_loss, optimizer)
  115. _executor.compile(train_network, inputs, label)
  116. def test_adam_group2():
  117. """ test_adam_group_lr_and_weight_decay """
  118. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  119. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  120. net = Net()
  121. net.set_train()
  122. loss = nn.SoftmaxCrossEntropyWithLogits()
  123. net_with_loss = WithLossCell(net, loss)
  124. all_params = net.trainable_params()
  125. schedule_lr = lr_schedules.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
  126. group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
  127. {'params': [all_params[1]]}]
  128. optimizer = nn.Adam(group_params, learning_rate=schedule_lr)
  129. train_network = TrainOneStepCell(net_with_loss, optimizer)
  130. _executor.compile(train_network, inputs, label)
  131. def test_adamweightdecay_group():
  132. """ test_adam_group_lr_and_weight_decay """
  133. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  134. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  135. net = Net()
  136. net.set_train()
  137. loss = nn.SoftmaxCrossEntropyWithLogits()
  138. net_with_loss = WithLossCell(net, loss)
  139. all_params = net.trainable_params()
  140. schedule_lr = lr_schedules.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
  141. group_params = [{'params': [all_params[0]], 'lr': 0.02, 'weight_decay': 0.9},
  142. {'params': [all_params[1]]}]
  143. optimizer = nn.AdamWeightDecay(group_params, learning_rate=schedule_lr)
  144. train_network = TrainOneStepCell(net_with_loss, optimizer)
  145. _executor.compile(train_network, inputs, label)
  146. def test_AdamWeightDecay_beta1():
  147. net = Net()
  148. print("**********", net.get_parameters())
  149. with pytest.raises(ValueError):
  150. AdamWeightDecay(net.get_parameters(), beta1=1.0, learning_rate=0.1)
  151. def test_AdamWeightDecay_beta2():
  152. net = Net()
  153. with pytest.raises(ValueError):
  154. AdamWeightDecay(net.get_parameters(), beta2=1.0, learning_rate=0.1)
  155. def test_AdamWeightDecay_e():
  156. net = Net()
  157. with pytest.raises(ValueError):
  158. AdamWeightDecay(net.get_parameters(), eps=-0.1, learning_rate=0.1)
  159. def test_adam_mindspore_with_empty_params():
  160. net = nn.Flatten()
  161. with pytest.raises(ValueError, match=r"Optimizer got an empty parameter list"):
  162. AdamWeightDecay(net.get_parameters())