You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_adafactor.py 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """ test adafactor """
  16. import numpy as np
  17. import pytest
  18. import mindspore.nn as nn
  19. from mindspore import Tensor, Parameter, context
  20. from mindspore.common.api import _executor
  21. from mindspore.nn import TrainOneStepCell, WithLossCell
  22. from mindspore.nn.optim.adafactor import AdaFactor
  23. from mindspore.ops import operations as P
  24. @pytest.fixture(scope="module", autouse=True)
  25. def setup_teardown():
  26. context.set_context(enable_sparse=True)
  27. yield
  28. context.set_context(enable_sparse=False)
  29. class Net(nn.Cell):
  30. """ Net definition """
  31. def __init__(self):
  32. super(Net, self).__init__()
  33. self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight")
  34. self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias")
  35. self.matmul = P.MatMul()
  36. self.biasAdd = P.BiasAdd()
  37. def construct(self, x):
  38. x = self.biasAdd(self.matmul(x, self.weight), self.bias)
  39. return x
  40. class NetWithoutWeight(nn.Cell):
  41. def __init__(self):
  42. super(NetWithoutWeight, self).__init__()
  43. self.matmul = P.MatMul()
  44. def construct(self, x):
  45. x = self.matmul(x, x)
  46. return x
  47. class NetWithSparseGatherV2(nn.Cell):
  48. """ NetWithSparseGatherV2 definition """
  49. def __init__(self):
  50. super(NetWithSparseGatherV2, self).__init__()
  51. self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
  52. self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
  53. self.axis = 0
  54. self.gather = P.SparseGatherV2()
  55. def construct(self, indices, label):
  56. return self.gather(self.weight1, indices, self.axis) + self.weight2
  57. def test_adafactor_compile1():
  58. """ test adafactor compile """
  59. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  60. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  61. net = Net()
  62. net.set_train()
  63. loss = nn.SoftmaxCrossEntropyWithLogits()
  64. optimizer = AdaFactor(net.trainable_params(), learning_rate=0.1, weight_decay=0.9, relative_step=False)
  65. net_with_loss = WithLossCell(net, loss)
  66. train_network = TrainOneStepCell(net_with_loss, optimizer)
  67. _executor.compile(train_network, inputs, label)
  68. def test_adafactor_compile2():
  69. """ test adafactor compile """
  70. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  71. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  72. net = Net()
  73. net.set_train()
  74. loss = nn.SoftmaxCrossEntropyWithLogits()
  75. optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9)
  76. net_with_loss = WithLossCell(net, loss)
  77. train_network = TrainOneStepCell(net_with_loss, optimizer)
  78. _executor.compile(train_network, inputs, label)
  79. def test_adafactor_compile3():
  80. """ test adafactor compile """
  81. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  82. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  83. net = Net()
  84. net.set_train()
  85. loss = nn.SoftmaxCrossEntropyWithLogits()
  86. optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9,
  87. scale_parameter=True, relative_step=True,
  88. warmup_init=False, compression=False)
  89. net_with_loss = WithLossCell(net, loss)
  90. train_network = TrainOneStepCell(net_with_loss, optimizer)
  91. _executor.compile(train_network, inputs, label)
  92. def test_adafactor_compile4():
  93. """ test adafactor compile """
  94. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  95. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  96. net = Net()
  97. net.set_train()
  98. loss = nn.SoftmaxCrossEntropyWithLogits()
  99. scale_parameter = False
  100. relative_step = True
  101. warmup_init = False
  102. compression = False
  103. optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9,
  104. scale_parameter=scale_parameter, relative_step=relative_step,
  105. warmup_init=warmup_init, compression=compression)
  106. net_with_loss = WithLossCell(net, loss)
  107. train_network = TrainOneStepCell(net_with_loss, optimizer)
  108. _executor.compile(train_network, inputs, label)
  109. def test_adafactor_compile5():
  110. """ test adafactor compile """
  111. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  112. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  113. net = Net()
  114. net.set_train()
  115. loss = nn.SoftmaxCrossEntropyWithLogits()
  116. scale_parameter = False
  117. relative_step = True
  118. warmup_init = True
  119. compression = True
  120. optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9,
  121. scale_parameter=scale_parameter, relative_step=relative_step,
  122. warmup_init=warmup_init, compression=compression)
  123. net_with_loss = WithLossCell(net, loss)
  124. train_network = TrainOneStepCell(net_with_loss, optimizer)
  125. _executor.compile(train_network, inputs, label)
  126. def test_adafactor_compile6():
  127. """ test adafactor compile """
  128. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  129. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  130. net = Net()
  131. net.set_train()
  132. loss = nn.SoftmaxCrossEntropyWithLogits()
  133. scale_parameter = True
  134. relative_step = True
  135. warmup_init = True
  136. compression = True
  137. optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9,
  138. scale_parameter=scale_parameter, relative_step=relative_step,
  139. warmup_init=warmup_init, compression=compression)
  140. net_with_loss = WithLossCell(net, loss)
  141. train_network = TrainOneStepCell(net_with_loss, optimizer)
  142. _executor.compile(train_network, inputs, label)
  143. def test_adafactor_group1():
  144. """ test_adafactor_group_lr_and_weight_decay """
  145. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  146. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  147. net = Net()
  148. net.set_train()
  149. loss = nn.SoftmaxCrossEntropyWithLogits()
  150. net_with_loss = WithLossCell(net, loss)
  151. all_params = net.trainable_params()
  152. poly_decay_lr = nn.polynomial_decay_lr(0.01, 0.0001, total_step=10, step_per_epoch=1, decay_epoch=3, power=1.0)
  153. group_params = [{'params': [all_params[0]]}, {'params': [all_params[1]]}]
  154. optimizer = AdaFactor(group_params, learning_rate=poly_decay_lr, relative_step=False)
  155. train_network = TrainOneStepCell(net_with_loss, optimizer)
  156. _executor.compile(train_network, inputs, label)
  157. def test_adafactor_group2():
  158. """ test_adafactor_group_lr_and_weight_decay """
  159. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  160. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  161. net = Net()
  162. net.set_train()
  163. loss = nn.SoftmaxCrossEntropyWithLogits()
  164. net_with_loss = WithLossCell(net, loss)
  165. all_params = net.trainable_params()
  166. schedule_lr = nn.PolynomialDecayLR(0.01, 0.0001, 3, power=1.0)
  167. group_params = [{'params': [all_params[0]]},
  168. {'params': [all_params[1]]}]
  169. optimizer = AdaFactor(group_params, learning_rate=schedule_lr, relative_step=False)
  170. train_network = TrainOneStepCell(net_with_loss, optimizer)
  171. _executor.compile(train_network, inputs, label)
  172. def test_adafactor_group3():
  173. """ test_adafactor_group_lr_and_weight_decay """
  174. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  175. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  176. net = Net()
  177. net.set_train()
  178. loss = nn.SoftmaxCrossEntropyWithLogits()
  179. net_with_loss = WithLossCell(net, loss)
  180. all_params = net.trainable_params()
  181. group_params = [{'params': [all_params[0]]}, {'params': [all_params[1]]}]
  182. optimizer = AdaFactor(group_params, learning_rate=None)
  183. train_network = TrainOneStepCell(net_with_loss, optimizer)
  184. _executor.compile(train_network, inputs, label)
  185. def test_adafactor_group4():
  186. """ test_adafactor_group_lr_and_weight_decay """
  187. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  188. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  189. net = Net()
  190. net.set_train()
  191. loss = nn.SoftmaxCrossEntropyWithLogits()
  192. net_with_loss = WithLossCell(net, loss)
  193. all_params = net.trainable_params()
  194. group_params = [{'params': [all_params[0]]},
  195. {'params': [all_params[1]]}]
  196. optimizer = AdaFactor(group_params, learning_rate=None)
  197. train_network = TrainOneStepCell(net_with_loss, optimizer)
  198. _executor.compile(train_network, inputs, label)
  199. def test_adafactor_group5():
  200. """ test_adafactor_group_lr_and_weight_decay """
  201. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  202. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  203. net = Net()
  204. net.set_train()
  205. loss = nn.SoftmaxCrossEntropyWithLogits()
  206. net_with_loss = WithLossCell(net, loss)
  207. all_params = net.trainable_params()
  208. group_params = [{'params': [all_params[0]]},
  209. {'params': [all_params[1]]}]
  210. optimizer = AdaFactor(group_params, learning_rate=None, beta1=0.1)
  211. train_network = TrainOneStepCell(net_with_loss, optimizer)
  212. _executor.compile(train_network, inputs, label)
  213. def test_adafactor_group6():
  214. """ test_adafactor_group_lr_and_weight_decay """
  215. inputs = Tensor(np.ones([1, 64]).astype(np.float32))
  216. label = Tensor(np.zeros([1, 10]).astype(np.float32))
  217. net = Net()
  218. net.set_train()
  219. loss = nn.SoftmaxCrossEntropyWithLogits()
  220. net_with_loss = WithLossCell(net, loss)
  221. all_params = net.trainable_params()
  222. group_params = [{'params': [all_params[0]]},
  223. {'params': [all_params[1]]}]
  224. optimizer = AdaFactor(group_params, learning_rate=None, beta1=0.2)
  225. train_network = TrainOneStepCell(net_with_loss, optimizer)
  226. _executor.compile(train_network, inputs, label)