You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_effect_optimizer.py 32 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import pytest
  16. import numpy as np
  17. import mindspore.nn as nn
  18. from mindspore import context, Tensor
  19. from mindspore.ops import operations as P
  20. from mindspore.common import dtype as mstype
  21. from mindspore.common.parameter import Parameter
  22. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
  23. class AdamNet(nn.Cell):
  24. def __init__(self, var, m, v):
  25. super(AdamNet, self).__init__()
  26. self.apply_adam = P.Adam()
  27. self.var = Parameter(var, name="var")
  28. self.m = Parameter(m, name="m")
  29. self.v = Parameter(v, name="v")
  30. def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
  31. self.apply_adam(self.var, self.m, self.v, beta1_power,
  32. beta2_power, lr, beta1, beta2, epsilon, grad)
  33. return self.var, self.m, self.v
  34. @pytest.mark.level0
  35. @pytest.mark.platform_arm_ascend_training
  36. @pytest.mark.platform_x86_ascend_training
  37. @pytest.mark.env_onecard
  38. def test_apply_adam():
  39. var = Tensor(np.ones([3, 3, 3]).astype(np.float32))
  40. m = Tensor(np.ones([3, 3, 3]).astype(np.float32))
  41. v = Tensor(np.ones([3, 3, 3]).astype(np.float32))
  42. net = AdamNet(var, m, v)
  43. beta1_power = Tensor(0.9, mstype.float32)
  44. beta2_power = Tensor(0.999, mstype.float32)
  45. lr = Tensor(0.001, mstype.float32)
  46. beta1 = Tensor(0.9, mstype.float32)
  47. beta2 = Tensor(0.999, mstype.float32)
  48. epsilon = Tensor(1e-8, mstype.float32)
  49. grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
  50. new_var, new_m, new_v = net(
  51. beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
  52. assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \
  53. "The results should be different!"
  54. class ApplyAdaMaxNet(nn.Cell):
  55. def __init__(self, val, m, v):
  56. super(ApplyAdaMaxNet, self).__init__()
  57. self.apply_ada_max = P.ApplyAdaMax()
  58. self.var = Parameter(val, name="var")
  59. self.m = Parameter(m, name="m")
  60. self.v = Parameter(v, name="v")
  61. def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad):
  62. self.apply_ada_max(self.var, self.m, self.v,
  63. beta1_power, lr, beta1, beta2, epsilon, grad)
  64. return self.var, self.m, self.v
  65. @pytest.mark.level0
  66. @pytest.mark.platform_arm_ascend_training
  67. @pytest.mark.platform_x86_ascend_training
  68. @pytest.mark.env_onecard
  69. def test_apply_ada_max():
  70. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  71. m = Tensor(np.random.rand(3, 3).astype(np.float32))
  72. v = Tensor(np.random.rand(3, 3).astype(np.float32))
  73. net = ApplyAdaMaxNet(var, m, v)
  74. beta1_power = Tensor(0.9, mstype.float32)
  75. lr = Tensor(0.001, mstype.float32)
  76. beta1 = Tensor(0.9, mstype.float32)
  77. beta2 = Tensor(0.99, mstype.float32)
  78. epsilon = Tensor(1e-10, mstype.float32)
  79. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  80. new_var, new_m, new_v = net(beta1_power, lr, beta1, beta2, epsilon, grad)
  81. assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \
  82. "The results should be different!"
  83. class ApplyAdadeltaNet(nn.Cell):
  84. def __init__(self, var, accum, accum_update):
  85. super(ApplyAdadeltaNet, self).__init__()
  86. self.apply_adadelta = P.ApplyAdadelta()
  87. self.var = Parameter(var, name="var")
  88. self.accum = Parameter(accum, name="accum")
  89. self.accum_update = Parameter(accum_update, name="accum_update")
  90. def construct(self, lr, rho, epsilon, grad):
  91. self.apply_adadelta(self.var, self.accum,
  92. self.accum_update, lr, rho, epsilon, grad)
  93. return self.var, self.accum, self.accum_update
  94. @pytest.mark.level0
  95. @pytest.mark.platform_arm_ascend_training
  96. @pytest.mark.platform_x86_ascend_training
  97. @pytest.mark.env_onecard
  98. def test_apply_adadelta():
  99. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  100. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  101. accum_update = Tensor(np.random.rand(3, 3).astype(np.float32))
  102. net = ApplyAdadeltaNet(var, accum, accum_update)
  103. lr = Tensor(0.001, mstype.float32)
  104. rho = Tensor(0.0, mstype.float32)
  105. epsilon = Tensor(1e-6, mstype.float32)
  106. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  107. new_var, new_accum, new_accum_update = net(lr, rho, epsilon, grad)
  108. assert ((new_var != var).any() and (new_accum != accum).any() and (new_accum_update != accum_update).any()), \
  109. "The results should be different!"
  110. class ApplyAdagrad(nn.Cell):
  111. def __init__(self, var, accum):
  112. super(ApplyAdagrad, self).__init__()
  113. self.apply_adagrad = P.ApplyAdagrad()
  114. self.var = Parameter(var, name="var")
  115. self.accum = Parameter(accum, name="accum")
  116. def construct(self, lr, grad):
  117. self.apply_adagrad(self.var, self.accum, lr, grad)
  118. return self.var, self.accum
  119. @pytest.mark.level0
  120. @pytest.mark.platform_arm_ascend_training
  121. @pytest.mark.platform_x86_ascend_training
  122. @pytest.mark.env_onecard
  123. def test_apply_adagrad():
  124. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  125. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  126. net = ApplyAdagrad(var, accum)
  127. lr = Tensor(0.001, mstype.float32)
  128. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  129. new_var, new_accum = net(lr, grad)
  130. assert ((new_var != var).any() and (new_accum != accum).any()), \
  131. "The results should be different!"
  132. class ApplyAdagradV2Net(nn.Cell):
  133. def __init__(self, var, accum):
  134. super(ApplyAdagradV2Net, self).__init__()
  135. self.apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6)
  136. self.var = Parameter(var, name="var")
  137. self.accum = Parameter(accum, name="accum")
  138. def construct(self, lr, grad):
  139. self.apply_adagrad_v2(self.var, self.accum, lr, grad)
  140. return self.var, self.accum
  141. @pytest.mark.level0
  142. @pytest.mark.platform_arm_ascend_training
  143. @pytest.mark.platform_x86_ascend_training
  144. @pytest.mark.env_onecard
  145. def test_apply_adagrad_v2():
  146. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  147. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  148. net = ApplyAdagradV2Net(var, accum)
  149. lr = Tensor(0.001, mstype.float32)
  150. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  151. new_var, new_accum = net(lr, grad)
  152. assert ((new_var != var).any() and (new_accum != accum).any()), \
  153. "The results should be different!"
  154. class ApplyAddSignNet(nn.Cell):
  155. def __init__(self, var, m):
  156. super(ApplyAddSignNet, self).__init__()
  157. self.apply_add_sign = P.ApplyAddSign()
  158. self.var = Parameter(var, name="var")
  159. self.m = Parameter(m, name="m")
  160. def construct(self, lr, alpha, sign_decay, beta, grad):
  161. self.apply_add_sign(self.var, self.m, lr, alpha,
  162. sign_decay, beta, grad)
  163. return self.var, self.m
  164. @pytest.mark.level0
  165. @pytest.mark.platform_arm_ascend_training
  166. @pytest.mark.platform_x86_ascend_training
  167. @pytest.mark.env_onecard
  168. def test_apply_add_sign():
  169. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  170. m = Tensor(np.random.rand(3, 3).astype(np.float32))
  171. net = ApplyAddSignNet(var, m)
  172. lr = Tensor(0.001, mstype.float32)
  173. alpha = Tensor(1.0, mstype.float32)
  174. sign_decay = Tensor(0.99, mstype.float32)
  175. beta = Tensor(0.9, mstype.float32)
  176. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  177. new_var, new_m = net(lr, alpha, sign_decay, beta, grad)
  178. assert ((new_var != var).any() and (new_m != m).any()), \
  179. "The results should be different!"
  180. class ApplyCenteredRMSPropNet(nn.Cell):
  181. def __init__(self, var):
  182. super(ApplyCenteredRMSPropNet, self).__init__()
  183. self.apply_centered_rms_prop = P.ApplyCenteredRMSProp()
  184. self.var = Parameter(var, name="var")
  185. def construct(self, mean_grad, mean_square, moment, grad, learning_rate):
  186. self.apply_centered_rms_prop(self.var, mean_grad, mean_square, moment, grad,
  187. learning_rate, 0.0, 1e-10, 0.05)
  188. return self.var
  189. @pytest.mark.level0
  190. @pytest.mark.platform_arm_ascend_training
  191. @pytest.mark.platform_x86_ascend_training
  192. @pytest.mark.env_onecard
  193. def test_apply_centered_rms_prop():
  194. var = Tensor(
  195. np.arange(-6, 6).astype(np.float32).reshape(2, 3, 2), mstype.float32)
  196. net = ApplyCenteredRMSPropNet(var)
  197. mean_grad = Tensor(np.arange(12).astype(
  198. np.float32).reshape(2, 3, 2), mstype.float32)
  199. mean_square = Tensor(
  200. np.arange(-8, 4).astype(np.float32).reshape(2, 3, 2), mstype.float32)
  201. moment = Tensor(np.arange(12).astype(
  202. np.float32).reshape(2, 3, 2), mstype.float32)
  203. grad = Tensor(np.arange(12).astype(
  204. np.float32).reshape(2, 3, 2), mstype.float32)
  205. learning_rate = Tensor(0.9, mstype.float32)
  206. new_var = net(mean_grad, mean_square, moment, grad, learning_rate)
  207. assert (new_var != var).any(), "The results should be different!"
  208. class ApplyFtrlNet(nn.Cell):
  209. def __init__(self, var, accum, linear):
  210. super(ApplyFtrlNet, self).__init__()
  211. self.apply_ftrl = P.ApplyFtrl()
  212. self.var = Parameter(var, name="var")
  213. self.accum = Parameter(accum, name="accum")
  214. self.linear = Parameter(linear, name="linear")
  215. def construct(self, grad, lr, l1, l2, lr_power):
  216. self.apply_ftrl(self.var, self.accum, self.linear,
  217. grad, lr, l1, l2, lr_power)
  218. return self.var, self.accum, self.linear
  219. @pytest.mark.level0
  220. @pytest.mark.platform_arm_ascend_training
  221. @pytest.mark.platform_x86_ascend_training
  222. @pytest.mark.env_onecard
  223. def test_apply_ftrl():
  224. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  225. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  226. linear = Tensor(np.random.rand(3, 3).astype(np.float32))
  227. net = ApplyFtrlNet(var, accum, linear)
  228. grad = Tensor(np.random.randint(-4, 4, (3, 3)), mstype.float32)
  229. lr = Tensor(0.001, mstype.float32)
  230. l1 = Tensor(0.0, mstype.float32)
  231. l2 = Tensor(0.0, mstype.float32)
  232. lr_power = Tensor(-0.5, mstype.float32)
  233. new_var, new_accum, new_linear = net(grad, lr, l1, l2, lr_power)
  234. assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \
  235. "The results should be different!"
  236. class ApplyGradientDescentNet(nn.Cell):
  237. def __init__(self, var):
  238. super(ApplyGradientDescentNet, self).__init__()
  239. self.apply_gradient_descent = P.ApplyGradientDescent()
  240. self.var = Parameter(var, name="var")
  241. def construct(self, alpha, delta):
  242. self.apply_gradient_descent(self.var, alpha, delta)
  243. return self.var
  244. @pytest.mark.level0
  245. @pytest.mark.platform_arm_ascend_training
  246. @pytest.mark.platform_x86_ascend_training
  247. @pytest.mark.env_onecard
  248. def test_apply_gradient_descent():
  249. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  250. net = ApplyGradientDescentNet(var)
  251. alpha = Tensor(0.001, mstype.float32)
  252. delta = Tensor(np.random.rand(3, 3).astype(np.float32))
  253. new_var = net(alpha, delta)
  254. assert (new_var != var).any(), "The results should be different!"
  255. class ApplyMomentumNet(nn.Cell):
  256. def __init__(self, var, accum):
  257. super(ApplyMomentumNet, self).__init__()
  258. self.apply_momentum = P.ApplyMomentum(gradient_scale=1024.0)
  259. self.var = Parameter(var, name='var')
  260. self.accum = Parameter(accum, name='accum')
  261. def construct(self, lr, grad, momentum):
  262. self.apply_momentum(self.var, self.accum, lr, grad, momentum)
  263. return self.var, self.accum
  264. @pytest.mark.level0
  265. @pytest.mark.platform_arm_ascend_training
  266. @pytest.mark.platform_x86_ascend_training
  267. @pytest.mark.env_onecard
  268. def test_apply_momentum():
  269. var = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32))
  270. accum = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32))
  271. net = ApplyMomentumNet(var, accum)
  272. lr = Tensor(np.random.normal(size=(1,)).astype(np.float32))
  273. grad = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32))
  274. momentum = Tensor(np.random.normal(size=(1,)).astype(np.float32))
  275. new_var, new_accum = net(lr, grad, momentum)
  276. assert ((new_var != var).any() and (new_accum != accum).any()), \
  277. "The results should be different!"
  278. class ApplyPowerSignNet(nn.Cell):
  279. def __init__(self, var, m):
  280. super(ApplyPowerSignNet, self).__init__()
  281. self.apply_power_sign = P.ApplyPowerSign()
  282. self.var = Parameter(var, name="var")
  283. self.m = Parameter(m, name="m")
  284. def construct(self, lr, logbase, sign_decay, beta, grad):
  285. self.apply_power_sign(self.var, self.m, lr,
  286. logbase, sign_decay, beta, grad)
  287. return self.var, self.m
  288. @pytest.mark.level0
  289. @pytest.mark.platform_arm_ascend_training
  290. @pytest.mark.platform_x86_ascend_training
  291. @pytest.mark.env_onecard
  292. def test_apply_power_sign():
  293. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  294. m = Tensor(np.random.rand(3, 3).astype(np.float32))
  295. net = ApplyPowerSignNet(var, m)
  296. lr = Tensor(0.001, mstype.float32)
  297. logbase = Tensor(np.e, mstype.float32)
  298. sign_decay = Tensor(0.99, mstype.float32)
  299. beta = Tensor(0.9, mstype.float32)
  300. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  301. new_var, new_m = net(lr, logbase, sign_decay, beta, grad)
  302. assert ((new_var != var).any() and (new_m != m).any()), \
  303. "The results should be different!"
  304. class ApplyProximalAdagradNet(nn.Cell):
  305. def __init__(self, var, accum):
  306. super(ApplyProximalAdagradNet, self).__init__()
  307. self.apply_proximal_adagrad = P.ApplyProximalAdagrad()
  308. self.var = Parameter(var, name="var")
  309. self.accum = Parameter(accum, name='accum')
  310. def construct(self, lr, l1, l2, grad):
  311. self.apply_proximal_adagrad(self.var, self.accum, lr, l1, l2, grad)
  312. return self.var, self.accum
  313. @pytest.mark.level0
  314. @pytest.mark.platform_arm_ascend_training
  315. @pytest.mark.platform_x86_ascend_training
  316. @pytest.mark.env_onecard
  317. def test_apply_proximal_adagrad():
  318. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  319. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  320. net = ApplyProximalAdagradNet(var, accum)
  321. lr = Tensor(0.01, mstype.float32)
  322. l1 = Tensor(0.0, mstype.float32)
  323. l2 = Tensor(0.0, mstype.float32)
  324. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  325. new_var, new_accum = net(lr, l1, l2, grad)
  326. assert ((new_var != var).any() and (new_accum != accum).any()), \
  327. "The results should be different!"
  328. class ApplyProximalGradientDescentNet(nn.Cell):
  329. def __init__(self, var):
  330. super(ApplyProximalGradientDescentNet, self).__init__()
  331. self.apply_proximal_gradient_descent = P.ApplyProximalGradientDescent()
  332. self.var = Parameter(var, name="var")
  333. def construct(self, alpha, l1, l2, delta):
  334. self.apply_proximal_gradient_descent(self.var, alpha, l1, l2, delta)
  335. return self.var
  336. @pytest.mark.level0
  337. @pytest.mark.platform_arm_ascend_training
  338. @pytest.mark.platform_x86_ascend_training
  339. @pytest.mark.env_onecard
  340. def test_apply_proximal_gradient_descent():
  341. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  342. net = ApplyProximalGradientDescentNet(var)
  343. alpha = Tensor(0.001, mstype.float32)
  344. l1 = Tensor(0.0, mstype.float32)
  345. l2 = Tensor(0.0, mstype.float32)
  346. delta = Tensor(np.random.rand(3, 3).astype(np.float32))
  347. new_var = net(alpha, l1, l2, delta)
  348. assert (new_var != var).any(), "The results should be different!"
  349. class ApplyRMSPropNet(nn.Cell):
  350. def __init__(self, var):
  351. super(ApplyRMSPropNet, self).__init__()
  352. self.apply_rms_prop = P.ApplyRMSProp()
  353. self.var = Parameter(var, name="var")
  354. def construct(self, mean_square, moment, learning_rate, grad):
  355. self.apply_rms_prop(self.var, mean_square, moment,
  356. learning_rate, grad, 0.0, 1e-10, 0.001)
  357. return self.var
  358. @pytest.mark.level0
  359. @pytest.mark.platform_arm_ascend_training
  360. @pytest.mark.platform_x86_ascend_training
  361. @pytest.mark.env_onecard
  362. def test_apply_rms_prop():
  363. var = Tensor(1., mstype.float32)
  364. net = ApplyRMSPropNet(var)
  365. mean_square = Tensor(2., mstype.float32)
  366. moment = Tensor(1., mstype.float32)
  367. learning_rate = Tensor(0.9, mstype.float32)
  368. grad = Tensor(2., mstype.float32)
  369. new_var = net(mean_square, moment, learning_rate, grad)
  370. assert (new_var != var).any(), "The results should be different!"
  371. class FusedSparseAdamNet(nn.Cell):
  372. def __init__(self, var, m, v):
  373. super(FusedSparseAdamNet, self).__init__()
  374. self.fused_sparse_adam = P.FusedSparseAdam()
  375. self.var = Parameter(var, name="var")
  376. self.m = Parameter(m, name="m")
  377. self.v = Parameter(v, name="v")
  378. def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices):
  379. self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,
  380. epsilon, grad, indices)
  381. return self.var, self.m, self.v
  382. @pytest.mark.level0
  383. @pytest.mark.platform_arm_ascend_training
  384. @pytest.mark.platform_x86_ascend_training
  385. @pytest.mark.env_onecard
  386. def test_fused_sparse_adam():
  387. var = Tensor(np.ones([3, 1, 2]).astype(np.float32))
  388. m = Tensor(np.ones([3, 1, 2]).astype(np.float32))
  389. v = Tensor(np.ones([3, 1, 2]).astype(np.float32))
  390. net = FusedSparseAdamNet(var, m, v)
  391. beta1_power = Tensor(0.9, mstype.float32)
  392. beta2_power = Tensor(0.999, mstype.float32)
  393. lr = Tensor(0.001, mstype.float32)
  394. beta1 = Tensor(0.9, mstype.float32)
  395. beta2 = Tensor(0.999, mstype.float32)
  396. epsilon = Tensor(1e-8, mstype.float32)
  397. gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32)
  398. indices = Tensor([0, 1], mstype.int32)
  399. new_var, new_m, new_v = net(
  400. beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices)
  401. assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \
  402. "The results should be different!"
  403. class FusedSparseFtrlNet(nn.Cell):
  404. def __init__(self, var, accum, linear):
  405. super(FusedSparseFtrlNet, self).__init__()
  406. self.fused_sparse_ftrl = P.FusedSparseFtrl(
  407. lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
  408. self.var = Parameter(var, name="var")
  409. self.accum = Parameter(accum, name="accum")
  410. self.linear = Parameter(linear, name="linear")
  411. def construct(self, grad, indices):
  412. self.fused_sparse_ftrl(self.var, self.accum,
  413. self.linear, grad, indices)
  414. return self.var, self.accum, self.linear
  415. @pytest.mark.level0
  416. @pytest.mark.platform_arm_ascend_training
  417. @pytest.mark.platform_x86_ascend_training
  418. @pytest.mark.env_onecard
  419. def test_fused_sparse_ftrl():
  420. var = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
  421. accum = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
  422. linear = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
  423. net = FusedSparseFtrlNet(var, accum, linear)
  424. grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
  425. indices = Tensor(np.array([0, 1]).astype(np.int32))
  426. new_var, new_accum, new_linear = net(grad, indices)
  427. assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \
  428. "The results should be different!"
  429. class FusedSparseLazyAdamNet(nn.Cell):
  430. def __init__(self, var, m, v):
  431. super(FusedSparseLazyAdamNet, self).__init__()
  432. self.fused_sparse_lazyadam = P.FusedSparseLazyAdam()
  433. self.var = Parameter(var, name="var")
  434. self.m = Parameter(m, name="m")
  435. self.v = Parameter(v, name="v")
  436. def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices):
  437. self.fused_sparse_lazyadam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1,
  438. beta2, epsilon, grad, indices)
  439. return self.var, self.m, self.v
  440. @pytest.mark.level0
  441. @pytest.mark.platform_arm_ascend_training
  442. @pytest.mark.platform_x86_ascend_training
  443. @pytest.mark.env_onecard
  444. def test_fused_sparse_lazyadam():
  445. var = Tensor(np.ones([3, 1, 2]).astype(np.float32))
  446. m = Tensor(np.ones([3, 1, 2]).astype(np.float32))
  447. v = Tensor(np.ones([3, 1, 2]).astype(np.float32))
  448. net = FusedSparseLazyAdamNet(var, m, v)
  449. beta1_power = Tensor(0.9, mstype.float32)
  450. beta2_power = Tensor(0.999, mstype.float32)
  451. lr = Tensor(0.001, mstype.float32)
  452. beta1 = Tensor(0.9, mstype.float32)
  453. beta2 = Tensor(0.999, mstype.float32)
  454. epsilon = Tensor(1e-8, mstype.float32)
  455. gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32)
  456. indices = Tensor([0, 1], mstype.int32)
  457. new_var, new_m, new_v = net(
  458. beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices)
  459. assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \
  460. "The results should be different!"
  461. class FusedSparseProximalAdagradNet(nn.Cell):
  462. def __init__(self, var, accum):
  463. super(FusedSparseProximalAdagradNet, self).__init__()
  464. self.fused_sparse_proximal_adagrad = P.FusedSparseProximalAdagrad()
  465. self.var = Parameter(var, name="var")
  466. self.accum = Parameter(accum, name="accum")
  467. def construct(self, lr, l1, l2, grad, indices):
  468. self.fused_sparse_proximal_adagrad(
  469. self.var, self.accum, lr, l1, l2, grad, indices)
  470. return self.var, self.accum
  471. @pytest.mark.level0
  472. @pytest.mark.platform_arm_ascend_training
  473. @pytest.mark.platform_x86_ascend_training
  474. @pytest.mark.env_onecard
  475. def test_fused_sparse_proximal_adagrad():
  476. var = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
  477. accum = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
  478. net = FusedSparseProximalAdagradNet(var, accum)
  479. lr = Tensor(0.01, mstype.float32)
  480. l1 = Tensor(0.0, mstype.float32)
  481. l2 = Tensor(0.0, mstype.float32)
  482. grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
  483. indices = Tensor(np.array([0, 1]).astype(np.int32))
  484. new_var, new_accum = net(lr, l1, l2, grad, indices)
  485. assert ((new_var != var).any() and (new_accum != accum).any()), \
  486. "The results should be different!"
  487. class SparseApplyAdagradNet(nn.Cell):
  488. def __init__(self, var, accum):
  489. super(SparseApplyAdagradNet, self).__init__()
  490. self.sparse_apply_adagrad = P.SparseApplyAdagrad(lr=0.01)
  491. self.var = Parameter(var, name="var")
  492. self.accum = Parameter(accum, name="accum")
  493. def construct(self, grad, indices):
  494. self.sparse_apply_adagrad(self.var, self.accum, grad, indices)
  495. return self.var, self.accum
  496. @pytest.mark.level0
  497. @pytest.mark.platform_arm_ascend_training
  498. @pytest.mark.platform_x86_ascend_training
  499. @pytest.mark.env_onecard
  500. def test_sparse_apply_adagrad():
  501. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  502. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  503. net = SparseApplyAdagradNet(var, accum)
  504. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  505. indices = Tensor(np.ones((3,), np.int32))
  506. new_var, _ = net(grad, indices)
  507. # new_accum is equal to accum.
  508. assert (new_var != var).any(), "The results should be different!"
  509. class SparseApplyAdagradV2Net(nn.Cell):
  510. def __init__(self, var, accum):
  511. super(SparseApplyAdagradV2Net, self).__init__()
  512. self.sparse_apply_adagrad_v2 = P.SparseApplyAdagradV2(
  513. lr=0.01, epsilon=0.001)
  514. self.var = Parameter(var, name="var")
  515. self.accum = Parameter(accum, name="accum")
  516. def construct(self, grad, indices):
  517. self.sparse_apply_adagrad_v2(self.var, self.accum, grad, indices)
  518. return self.var, self.accum
  519. @pytest.mark.level0
  520. @pytest.mark.platform_arm_ascend_training
  521. @pytest.mark.platform_x86_ascend_training
  522. @pytest.mark.env_onecard
  523. def test_sparse_apply_adagrad_v2():
  524. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  525. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  526. net = SparseApplyAdagradV2Net(var, accum)
  527. grad = grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  528. indices = Tensor(np.ones((3,), np.int32))
  529. new_var, new_accum = net(grad, indices)
  530. assert ((new_var != var).any() and (new_accum != accum).any()), \
  531. "The results should be different!"
  532. class SparseApplyFtrlNet(nn.Cell):
  533. def __init__(self, var, accum, linear):
  534. super(SparseApplyFtrlNet, self).__init__()
  535. self.sparse_apply_ftrl = P.SparseApplyFtrl(
  536. lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
  537. self.var = Parameter(var, name="var")
  538. self.accum = Parameter(accum, name="accum")
  539. self.linear = Parameter(linear, name="linear")
  540. def construct(self, grad, indices):
  541. self.sparse_apply_ftrl(self.var, self.accum,
  542. self.linear, grad, indices)
  543. return self.var, self.accum, self.linear
  544. @pytest.mark.level0
  545. @pytest.mark.platform_arm_ascend_training
  546. @pytest.mark.platform_x86_ascend_training
  547. @pytest.mark.env_onecard
  548. def test_sparse_apply_ftrl():
  549. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  550. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  551. linear = Tensor(np.random.rand(3, 3).astype(np.float32))
  552. net = SparseApplyFtrlNet(var, accum, linear)
  553. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  554. indices = Tensor(np.ones((3,), np.int32))
  555. new_var, new_accum, new_linear = net(grad, indices)
  556. assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \
  557. "The results should be different!"
  558. class SparseApplyFtrlV2Net(nn.Cell):
  559. def __init__(self, var, accum, linear):
  560. super(SparseApplyFtrlV2Net, self).__init__()
  561. self.sparse_apply_ftrl_v2 = P.SparseApplyFtrlV2(
  562. lr=0.01, l1=0.0, l2=0.0, l2_shrinkage=0.0, lr_power=-0.5)
  563. self.var = Parameter(var, name="var")
  564. self.accum = Parameter(accum, name="accum")
  565. self.linear = Parameter(linear, name="linear")
  566. def construct(self, grad, indices):
  567. self.sparse_apply_ftrl_v2(
  568. self.var, self.accum, self.linear, grad, indices)
  569. return self.var, self.accum, self.linear
  570. @pytest.mark.level0
  571. @pytest.mark.platform_arm_ascend_training
  572. @pytest.mark.platform_x86_ascend_training
  573. @pytest.mark.env_onecard
  574. def test_sparse_apply_ftrl_v2():
  575. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  576. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  577. linear = Tensor(np.random.rand(3, 3).astype(np.float32))
  578. net = SparseApplyFtrlV2Net(var, accum, linear)
  579. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  580. indices = Tensor(np.ones((3,), np.int32))
  581. new_var, new_accum, new_linear = net(grad, indices)
  582. assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \
  583. "The results should be different!"
  584. class SparseApplyProximalAdagradNet(nn.Cell):
  585. def __init__(self, var, accum):
  586. super(SparseApplyProximalAdagradNet, self).__init__()
  587. self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad()
  588. self.var = Parameter(var, name="var")
  589. self.accum = Parameter(accum, name="accum")
  590. def construct(self, lr, l1, l2, grad, indices):
  591. self.sparse_apply_proximal_adagrad(
  592. self.var, self.accum, lr, l1, l2, grad, indices)
  593. return self.var, self.accum
  594. @pytest.mark.level0
  595. @pytest.mark.platform_arm_ascend_training
  596. @pytest.mark.platform_x86_ascend_training
  597. @pytest.mark.env_onecard
  598. def test_sparse_apply_proximal_adagrad():
  599. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  600. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  601. net = SparseApplyProximalAdagradNet(var, accum)
  602. lr = Tensor(0.01, mstype.float32)
  603. l1 = Tensor(0.0, mstype.float32)
  604. l2 = Tensor(0.0, mstype.float32)
  605. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  606. indices = Tensor(np.ones((3,), np.int32))
  607. new_var, new_accum = net(lr, l1, l2, grad, indices)
  608. assert ((new_var != var).any() and (new_accum != accum).any()), \
  609. "The results should be different!"
  610. class SGDNet(nn.Cell):
  611. def __init__(self, var):
  612. super(SGDNet, self).__init__()
  613. self.sgd = P.SGD()
  614. self.var = Parameter(var, name="var")
  615. def construct(self, gradient, learning_rate, accum, momentum, stat):
  616. self.sgd(self.var, gradient, learning_rate, accum, momentum, stat)
  617. return self.var
  618. @pytest.mark.level0
  619. @pytest.mark.platform_arm_ascend_training
  620. @pytest.mark.platform_x86_ascend_training
  621. @pytest.mark.env_onecard
  622. def test_sgd():
  623. var = Tensor(np.array([2, -0.5, 1.7, 4]), mstype.float32)
  624. net = SGDNet(var)
  625. gradient = Tensor(np.array([1, -1, 0.5, 2]), mstype.float32)
  626. learning_rate = Tensor(0.01, mstype.float32)
  627. accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mstype.float32)
  628. momentum = Tensor(0.1, mstype.float32)
  629. stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mstype.float32)
  630. new_var = net(gradient, learning_rate, accum, momentum, stat)
  631. assert (new_var != var).any(), "The results should be different!"
  632. class ApplyProximalAdagradConstantNet(nn.Cell):
  633. def __init__(self, var, accum):
  634. super().__init__()
  635. self.depend = P.Depend()
  636. self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad()
  637. self.var = Parameter(var, name="var")
  638. self.accum = Parameter(accum, name="accum")
  639. self.const = Tensor(9999, mstype.float32)
  640. def construct(self, lr, l1, l2, grad, indices):
  641. optimizer = self.sparse_apply_proximal_adagrad(
  642. self.var, self.accum, lr, l1, l2, grad, indices)
  643. return self.depend(self.const, optimizer)
  644. @pytest.mark.level0
  645. @pytest.mark.platform_arm_ascend_training
  646. @pytest.mark.platform_x86_ascend_training
  647. @pytest.mark.env_onecard
  648. def test_sparse_apply_proximal_adagrad_constant():
  649. var = Tensor(np.random.rand(3, 3).astype(np.float32))
  650. accum = Tensor(np.random.rand(3, 3).astype(np.float32))
  651. net = ApplyProximalAdagradConstantNet(var, accum)
  652. lr = Tensor(0.01, mstype.float32)
  653. l1 = Tensor(0.1, mstype.float32)
  654. l2 = Tensor(0.2, mstype.float32)
  655. grad = Tensor(np.random.rand(3, 3).astype(np.float32))
  656. indices = Tensor(np.ones((3,), np.int32))
  657. net(lr, l1, l2, grad, indices)
  658. assert (net.parameters_dict()['var'].data != var).any()
  659. assert (net.parameters_dict()['accum'].data != accum).any()
  660. class MulSGDNet(nn.Cell):
  661. def __init__(self, var):
  662. super().__init__()
  663. self.sgd = P.SGD()
  664. self.var = Parameter(var, name="var")
  665. self.mul = P.Mul()
  666. def construct(self, gradient, learning_rate, accum, momentum, stat):
  667. out = self.mul(self.var, self.var)
  668. self.sgd(self.var, gradient, learning_rate, accum, momentum, stat)
  669. return out
  670. @pytest.mark.level0
  671. @pytest.mark.platform_arm_ascend_training
  672. @pytest.mark.platform_x86_ascend_training
  673. @pytest.mark.env_onecard
  674. def test_mul_sgd():
  675. var = Tensor(np.array([2, -0.5, 1.7, 4]), mstype.float32)
  676. net = MulSGDNet(var)
  677. gradient = Tensor(np.array([1, -1, 0.5, 2]), mstype.float32)
  678. learning_rate = Tensor(0.01, mstype.float32)
  679. accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mstype.float32)
  680. momentum = Tensor(0.1, mstype.float32)
  681. stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mstype.float32)
  682. net(gradient, learning_rate, accum, momentum, stat)
  683. assert (net.parameters_dict()['var'].data != var).any()