You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_auto_monad_gpu.py 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import os
  16. import re
  17. import subprocess
  18. import pytest
  19. import numpy as np
  20. import mindspore as ms
  21. import mindspore.ops.operations as P
  22. from mindspore.nn import Cell
  23. from mindspore.nn import ReLU, BatchNorm2d, Conv2d, ParameterUpdate
  24. from mindspore.nn import Momentum
  25. from mindspore.nn import SoftmaxCrossEntropyWithLogits
  26. from mindspore import amp
  27. from mindspore import context, Tensor
  28. from mindspore.common import ParameterTuple
  29. from mindspore.common.parameter import Parameter
  30. from mindspore.ops.composite import GradOperation
  31. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  32. class _Grad(Cell):
  33. def __init__(self, grad, network, wrt_params=False, real_inputs_count=None):
  34. super().__init__()
  35. self.network = network
  36. self.grad = grad
  37. self.sens_param = self.grad.sens_param
  38. self.wrt_params = wrt_params
  39. self.real_inputs_count = real_inputs_count
  40. if self.wrt_params:
  41. self.params = ParameterTuple(self.network.trainable_params())
  42. def construct(self, *inputs):
  43. if self.real_inputs_count is None or self.sens_param is False:
  44. if self.wrt_params:
  45. return self.grad(self.network, self.params)(*inputs)
  46. return self.grad(self.network)(*inputs)
  47. real_inputs = inputs[:self.real_inputs_count]
  48. sense_param_inputs = inputs[self.real_inputs_count:]
  49. if self.wrt_params:
  50. return self.grad(self.network, self.params)(*real_inputs, sense_param_inputs)
  51. return self.grad(self.network)(*real_inputs, sense_param_inputs)
  52. class GradOfAllInputs(_Grad):
  53. '''
  54. get grads of all inputs
  55. '''
  56. def __init__(self, network, sens_param=True, real_inputs_count=None):
  57. super().__init__(grad=GradOperation(get_all=True, sens_param=sens_param),
  58. network=network, real_inputs_count=real_inputs_count)
  59. class GradOfAllInputsAndParams(_Grad):
  60. '''
  61. get grads of all inputs and params
  62. '''
  63. def __init__(self, network, sens_param=True, real_inputs_count=None):
  64. super().__init__(grad=GradOperation(get_all=True, get_by_list=True, sens_param=sens_param),
  65. network=network, wrt_params=True, real_inputs_count=real_inputs_count)
  66. def _count_unequal_element(data_expected, data_me, rtol, atol):
  67. assert data_expected.shape == data_me.shape
  68. total_count = len(data_expected.flatten())
  69. error = np.abs(data_expected - data_me)
  70. greater = np.greater(error, atol + np.abs(data_me)*rtol)
  71. loss_count = np.count_nonzero(greater)
  72. assert (loss_count/total_count) < rtol, \
  73. "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".\
  74. format(data_expected[greater], data_me[greater], error[greater])
  75. def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True):
  76. if np.any(np.isnan(data_expected)):
  77. assert np.allclose(data_expected, data_me, rtol,
  78. atol, equal_nan=equal_nan)
  79. elif not np.allclose(data_expected, data_me, rtol, atol, equal_nan=equal_nan):
  80. _count_unequal_element(data_expected, data_me, rtol, atol)
  81. else:
  82. assert True
  83. def clear_files():
  84. os.system("rm verbose_ir_files/*")
  85. def find_files(file, para):
  86. output = subprocess.check_output(
  87. ["grep '%s' verbose_ir_files/%s | wc -l" % (para, file)],
  88. shell=True)
  89. out = str(output, 'utf-8').strip()
  90. return out
  91. class SideEffectCastAll(Cell):
  92. def __init__(self):
  93. super().__init__()
  94. self.cast = P.Cast()
  95. self.dtype = ms.float16
  96. np.random.seed(5)
  97. inputs1 = np.random.randn(5, 5)
  98. inputs2 = np.random.randn(5, 5)
  99. self.parameter_a = Parameter(Tensor(inputs1, ms.float32), name="a")
  100. self.parameter_b = Parameter(Tensor(inputs2, ms.float32), name="b")
  101. self.assign = P.Assign()
  102. def construct(self, x, y):
  103. self.assign(self.parameter_a, x)
  104. self.assign(self.parameter_b, y)
  105. out_a = self.cast(self.parameter_a, self.dtype)
  106. out_b = self.cast(self.parameter_b, self.dtype)
  107. return out_a, out_b
  108. def test_side_effect_castall():
  109. clear_files()
  110. context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
  111. net = SideEffectCastAll()
  112. inputs1 = np.random.randn(5, 5)
  113. inputs2 = np.random.randn(5, 5)
  114. net(Tensor(inputs1, ms.float32), Tensor(inputs2, ms.float32))
  115. result = find_files('hwopt*cast_all*.ir', 'CastAll')
  116. assert result == '2'
  117. class SideEffectControlFlowAssignDependWhileNet(Cell):
  118. def __init__(self):
  119. super().__init__()
  120. self.parameter1 = Parameter(
  121. Tensor([199.0], ms.float32), name="parameter1")
  122. self.assign = P.Assign()
  123. self.assignadd = P.AssignAdd()
  124. self.addn = P.AddN()
  125. def construct(self, x, y, z):
  126. self.assign(self.parameter1, x)
  127. while self.parameter1 < y:
  128. x = self.addn((x, x))
  129. self.assignadd(self.parameter1, z)
  130. return x
  131. def grad_mindspore_impl(self, params1, params2, params3, grad_ys):
  132. grad_net = GradOfAllInputsAndParams(self)
  133. grad_net.set_train()
  134. grad_out = grad_net(params1, params2, params3, grad_ys)
  135. return grad_out
  136. @pytest.mark.level0
  137. @pytest.mark.platform_x86_gpu_training
  138. @pytest.mark.env_onecard
  139. def test_side_effect_control_flow_assign_depend_while_net():
  140. net = SideEffectControlFlowAssignDependWhileNet()
  141. context.set_context(mode=context.GRAPH_MODE)
  142. out1 = net(Tensor([9.0], ms.float32), Tensor(
  143. [99.0], ms.float32), Tensor([1.0], ms.float32))
  144. net = SideEffectControlFlowAssignDependWhileNet()
  145. context.set_context(mode=context.PYNATIVE_MODE)
  146. out2 = net(Tensor([9.0], ms.float32), Tensor(
  147. [99.0], ms.float32), Tensor([1.0], ms.float32))
  148. allclose_nparray(out1.asnumpy(), out2.asnumpy(), 0.001, 0.001)
  149. class Addn(Cell):
  150. def __init__(self):
  151. super().__init__()
  152. self.parameter3 = Parameter(Tensor([1.0], ms.float32),
  153. name="parameter3")
  154. self.parameter4 = Parameter(Tensor([3.0], ms.float32),
  155. name="parameter4")
  156. self.addn = P.AddN()
  157. def construct(self, inputs):
  158. out = self.addn((inputs, self.parameter3, self.parameter4))
  159. return out
  160. class Relu(Cell):
  161. def __init__(self):
  162. super().__init__()
  163. self.relu = P.ReLU()
  164. def construct(self, inputs):
  165. out = self.relu(inputs)
  166. return out
  167. class SideEffectTwoAssignTwoAddnDependencyNet(Cell):
  168. def __init__(self):
  169. super().__init__()
  170. self.parameter1 = Parameter(Tensor([1.0], ms.float32),
  171. name="parameter1")
  172. self.parameter2 = Parameter(Tensor([3.0], ms.float32),
  173. name="parameter2")
  174. self.assign = P.Assign()
  175. self.addN = P.AddN()
  176. def construct(self, inputs):
  177. self.assign(self.parameter1, inputs)
  178. out = self.addN((inputs, self.parameter1, self.parameter2))
  179. self.assign(self.parameter2, inputs)
  180. out = self.addN((out, self.parameter1, self.parameter2))
  181. return out
  182. def grad_mindspore_impl(self, params, grad_ys):
  183. grad_net = GradOfAllInputsAndParams(self)
  184. grad_net.set_train()
  185. grad_out = grad_net(params, grad_ys)
  186. return grad_out
  187. @pytest.mark.level0
  188. @pytest.mark.platform_x86_gpu_training
  189. @pytest.mark.env_onecard
  190. def test_ctrl_while_by_while_and_if_in_first_while():
  191. class Net(Cell):
  192. def __init__(self):
  193. super().__init__()
  194. self.relu = P.ReLU()
  195. self.sigmoid = P.Sigmoid()
  196. self.tanh = P.Tanh()
  197. self.add = P.Add()
  198. a = np.full((1,), 5, dtype=np.float32)
  199. self.a = Parameter(Tensor(a), name="a")
  200. b = np.full((1,), 4, dtype=np.float32)
  201. self.b = Parameter(Tensor(b), name="b")
  202. c = np.full((1,), 7, dtype=np.float32)
  203. self.c = Parameter(Tensor(c), name="c")
  204. def construct(self, x):
  205. out = x
  206. while self.a < 7:
  207. if self.a < self.c:
  208. out = self.relu(x)
  209. self.a += 1
  210. while self.c > 5:
  211. out = self.add(out, out)
  212. self.c -= 1
  213. return out
  214. context.set_context(mode=context.GRAPH_MODE)
  215. input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32)
  216. input_me_a = Tensor(input_np_a)
  217. net = Net()
  218. net(input_me_a)
  219. @pytest.mark.level0
  220. @pytest.mark.platform_x86_gpu_training
  221. @pytest.mark.env_onecard
  222. def test_ctrl_while_by_while_and_while_in_first_while():
  223. class Net(Cell):
  224. def __init__(self):
  225. super().__init__()
  226. self.relu = P.ReLU()
  227. self.sigmoid = P.Sigmoid()
  228. self.tanh = P.Tanh()
  229. self.add = P.Add()
  230. a = np.full((1,), 5, dtype=np.float32)
  231. self.a = Parameter(Tensor(a), name="a")
  232. b = np.full((1,), 4, dtype=np.float32)
  233. self.b = Parameter(Tensor(b), name="b")
  234. c = np.full((1,), 7, dtype=np.float32)
  235. self.c = Parameter(Tensor(c), name="c")
  236. def construct(self, x):
  237. out = x
  238. while self.a < self.c:
  239. out = self.relu(x)
  240. while self.b > 1:
  241. self.b -= 1
  242. self.a += 1
  243. while self.c > 5:
  244. out = self.add(out, out)
  245. self.c -= 1
  246. return out
  247. context.set_context(mode=context.GRAPH_MODE)
  248. input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32)
  249. input_me_a = Tensor(input_np_a)
  250. net = Net()
  251. net(input_me_a)
  252. class InplaceNet(Cell):
  253. def __init__(self):
  254. super().__init__()
  255. self.bn1 = BatchNorm2d(num_features=4, eps=1e-4,
  256. momentum=0.9, gamma_init=1, beta_init=0,
  257. moving_mean_init=0, moving_var_init=1, data_format="NHWC")
  258. self.bn2 = BatchNorm2d(num_features=4, eps=1e-4,
  259. momentum=0.9, gamma_init=1, beta_init=0,
  260. moving_mean_init=0, moving_var_init=1, data_format="NHWC")
  261. self.add = P.Add()
  262. self.relu = ReLU()
  263. self.conv2d1 = Conv2d(in_channels=4, out_channels=4,
  264. kernel_size=2, data_format="NHWC")
  265. self.conv2d2 = Conv2d(in_channels=4, out_channels=4,
  266. kernel_size=2, data_format="NHWC")
  267. self.conv2d3 = Conv2d(in_channels=4, out_channels=4,
  268. kernel_size=2, data_format="NHWC")
  269. self.conv2d4 = Conv2d(in_channels=4, out_channels=4,
  270. kernel_size=2, data_format="NHWC")
  271. def construct(self, input_x):
  272. tmp_c1 = self.conv2d1(input_x)
  273. tmp_c2 = self.conv2d2(input_x)
  274. tmp_x = self.bn1(tmp_c1)
  275. tmp_y = self.bn2(tmp_c2)
  276. tmp_w = self.add(tmp_x, tmp_y)
  277. tmp_w = self.relu(tmp_w)
  278. tmp_c1 = self.conv2d3(tmp_w)
  279. tmp_c2 = self.conv2d4(tmp_w)
  280. output = self.add(tmp_c1, tmp_c2)
  281. return output
  282. def test_ir_fusion_inplace_bn_conv_conv():
  283. clear_files()
  284. context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
  285. input_np = np.random.uniform(0.0, 255.0,
  286. size=[4, 4, 4, 4]).astype(np.float32)
  287. label = np.ones([4, 4, 4, 4]).astype(np.float32)
  288. net = InplaceNet()
  289. loss = SoftmaxCrossEntropyWithLogits(sparse=False)
  290. opt = Momentum(learning_rate=0.01, momentum=0.9,
  291. params=filter(lambda x: x.requires_grad, net.get_parameters()))
  292. net = amp.build_train_network(net, opt, loss, level="O2",
  293. keep_batchnorm_fp32=False)
  294. net.set_train()
  295. net(Tensor(input_np), Tensor(label))
  296. find_accum = find_files("hwopt*cudnn_inplace*ir",
  297. "inplace_algo: accumulation")
  298. find_cover = find_files("hwopt*cudnn_inplace*ir",
  299. "inplace_algo: cover")
  300. assert find_accum == '1'
  301. assert find_cover == '1'
  302. def clean_all_ir_files(folder_path):
  303. if os.path.exists(folder_path):
  304. for file_name in os.listdir(folder_path):
  305. if file_name.endswith('.ir') or file_name.endswith('.dot') or \
  306. file_name.endswith('.dat'):
  307. os.remove(os.path.join(folder_path, file_name))
  308. def find_newest_validateir_file(folder_path):
  309. ckpt_files = map(lambda f: os.path.join(folder_path, f),
  310. filter(lambda f: re.match(r'\d+_validate_\d+.ir', f),
  311. os.listdir(folder_path)))
  312. return max(ckpt_files, key=os.path.getctime)
  313. def read_file():
  314. filename = find_newest_validateir_file('./')
  315. with open((os.path.join(filename)), 'r') as f:
  316. content = f.read()
  317. clean_all_ir_files('./')
  318. return content
  319. class Add(Cell):
  320. def __init__(self):
  321. super().__init__()
  322. self.add = P.Add()
  323. def construct(self, x, y):
  324. return self.add(x, y)
  325. class MixControlNet(Cell):
  326. def __init__(self, in_channel, x):
  327. super().__init__()
  328. #self._save_graphs(save_graph_flag=True, save_graph_path=".")
  329. self.biasadd = P.BiasAdd()
  330. self.equal = P.Equal()
  331. self.addn = P.AddN()
  332. self.conv = Conv2d(in_channels=in_channel, out_channels=in_channel,
  333. kernel_size=1, stride=1, has_bias=False,
  334. weight_init='ones', pad_mode='same')
  335. self.bn = BatchNorm2d(num_features=in_channel)
  336. self.controldepend = P.ControlDepend()
  337. self.assignadd = P.AssignAdd()
  338. self.assign = P.Assign()
  339. self.relu = ReLU()
  340. self.mean = P.ReduceMean(keep_dims=False)
  341. self.bias = Parameter(
  342. Tensor(np.random.randint(2, size=(3,)).astype((np.float32))),
  343. name="bias")
  344. self.bias2 = Parameter(Tensor(np.ones([3]).astype(np.float32)),
  345. name="bias2")
  346. self.parameterupdate = ParameterUpdate(self.bias)
  347. self.value = Tensor(np.random.randn(*(3,)), ms.float32)
  348. self.x = x
  349. def construct(self, input_x):
  350. x = self.x
  351. z = self.x
  352. out = self.biasadd(input_x, self.bias)
  353. while x < 20:
  354. update = self.parameterupdate(self.bias2)
  355. out = self.biasadd(out, update)
  356. if x < 10:
  357. out = self.addn((input_x, out))
  358. while z < 20:
  359. out = self.conv(out)
  360. z = z + 1
  361. if x < 20:
  362. out = self.biasadd(out, self.bias)
  363. if x % 2 == 0:
  364. out = self.biasadd(out, self.bias)
  365. assign = self.assignadd(self.bias, self.value)
  366. self.controldepend(assign, out)
  367. out = self.bn(out)
  368. else:
  369. out = self.conv(out)
  370. x = x + 1
  371. out = self.addn((out, out))
  372. out = self.mean(out, (2, 3))
  373. return out
  374. def use_build_train_network_controlflow_check_cast_num(network, level, input_x,
  375. label, cast_num,
  376. sparse=False,
  377. loss_flag=True,
  378. **kwargs):
  379. opt = Momentum(learning_rate=0.0001, momentum=0.009,
  380. params=network.trainable_params())
  381. loss = None
  382. if loss_flag:
  383. loss = SoftmaxCrossEntropyWithLogits(sparse=sparse, reduction='mean')
  384. train_network = ms.amp.build_train_network(network, opt, loss, level=level,
  385. **kwargs)
  386. out_me = train_network(input_x, label)
  387. if context.get_context("mode") == 0:
  388. content = read_file()
  389. castnum = re.findall('Cast', content)
  390. assert len(castnum) == cast_num
  391. return out_me
  392. def test_auto_mixed_precision_controlflow_auto():
  393. context.set_context(mode=context.PYNATIVE_MODE, save_graphs=True)
  394. net = MixControlNet(3, 5)
  395. input_x = Tensor(
  396. np.random.randint(2, size=(1, 3, 2, 2)).astype((np.float32)))
  397. label = Tensor(np.zeros([1, 3]).astype(np.float32))
  398. if ms.context.get_context("device_target") == "Ascend":
  399. cast_num = 77
  400. if ms.context.get_context("device_target") == "GPU":
  401. cast_num = 73
  402. use_build_train_network_controlflow_check_cast_num(net, "auto", input_x,
  403. label, cast_num)
  404. def test_updatestate_between_assigns():
  405. class UpdateState_Assigns(Cell):
  406. def __init__(self):
  407. super().__init__()
  408. self.para1 = Parameter(Tensor(1, dtype=ms.int32), name='para1')
  409. self.para2 = Parameter(Tensor(3, dtype=ms.int32), name='para2')
  410. def construct(self, value1, value2):
  411. self.para1 = value1
  412. self.para2 = value2
  413. return self.para2
  414. context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
  415. input_x = Tensor(10, dtype=ms.int32)
  416. input_y = Tensor(30, dtype=ms.int32)
  417. expect = Tensor(30, dtype=ms.int32)
  418. net = UpdateState_Assigns()
  419. out = net(input_x, input_y)
  420. np.testing.assert_array_equal(out.asnumpy(), expect.asnumpy())
  421. if ms.context.get_context('mode') == 0:
  422. content = read_file()
  423. updatestate_num = re.findall('UpdateState', content)
  424. assert len(updatestate_num) == 1
  425. def test_updatestate_between_maketuple_assign():
  426. class UpdateState_MakeTuple_Assign(Cell):
  427. def __init__(self):
  428. super().__init__()
  429. self.para1 = Parameter(Tensor(1, dtype=ms.int32), name='para1')
  430. self.para2 = Parameter(Tensor(3, dtype=ms.int32), name='para2')
  431. self.para3 = Parameter(Tensor(5, dtype=ms.int32), name='para3')
  432. def construct(self, value1, value2, value3):
  433. (self.para1, self.para2) = (value1, value2)
  434. self.para3 = value3
  435. return self.para3
  436. context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
  437. input_x = Tensor(10, dtype=ms.int32)
  438. input_y = Tensor(30, dtype=ms.int32)
  439. input_z = Tensor(50, dtype=ms.int32)
  440. expect = Tensor(50, dtype=ms.int32)
  441. net = UpdateState_MakeTuple_Assign()
  442. out = net(input_x, input_y, input_z)
  443. np.testing.assert_array_equal(out.asnumpy(), expect.asnumpy())
  444. if ms.context.get_context('mode') == 0:
  445. content = read_file()
  446. updatestate_num = re.findall('UpdateState', content)
  447. assert len(updatestate_num) == 1
  448. def test_updatestate_between_assign_maketuple():
  449. class UpdateState_Assign_MakeTuple(Cell):
  450. def __init__(self):
  451. super().__init__()
  452. self.para1 = Parameter(Tensor(1, dtype=ms.int32), name='para1')
  453. self.para2 = Parameter(Tensor(3, dtype=ms.int32), name='para2')
  454. self.para3 = Parameter(Tensor(5, dtype=ms.int32), name='para3')
  455. def construct(self, value1, value2, value3):
  456. self.para1 = value1
  457. (self.para2, self.para3) = (value2, value3)
  458. return self.para3
  459. context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
  460. input_x = Tensor(10, dtype=ms.int32)
  461. input_y = Tensor(30, dtype=ms.int32)
  462. input_z = Tensor(50, dtype=ms.int32)
  463. expect = Tensor(50, dtype=ms.int32)
  464. net = UpdateState_Assign_MakeTuple()
  465. out = net(input_x, input_y, input_z)
  466. np.testing.assert_array_equal(out.asnumpy(), expect.asnumpy())
  467. if ms.context.get_context('mode') == 0:
  468. content = read_file()
  469. updatestate_num = re.findall('UpdateState', content)
  470. assert len(updatestate_num) == 1