You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_data_dump.py 21 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. # Copyright 2020-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. import os
  16. import sys
  17. import tempfile
  18. import time
  19. import shutil
  20. import glob
  21. import csv
  22. from importlib import import_module
  23. from pathlib import Path
  24. import numpy as np
  25. import pytest
  26. import mindspore.context as context
  27. import mindspore.nn as nn
  28. from mindspore import Tensor
  29. from mindspore.ops import operations as P
  30. from mindspore.nn import Cell
  31. from mindspore.nn import Dense
  32. from mindspore.nn import SoftmaxCrossEntropyWithLogits
  33. from mindspore.nn import Momentum
  34. from mindspore.nn import TrainOneStepCell
  35. from mindspore.nn import WithLossCell
  36. from dump_test_utils import generate_dump_json, generate_dump_json_with_overflow, \
  37. generate_statistic_dump_json, check_dump_structure, find_nth_pos
  38. from tests.security_utils import security_off_wrap
  39. class Net(nn.Cell):
  40. def __init__(self):
  41. super(Net, self).__init__()
  42. self.add = P.Add()
  43. def construct(self, x_, y_):
  44. return self.add(x_, y_)
  45. x = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
  46. y = np.array([[7, 8, 9], [10, 11, 12]]).astype(np.float32)
  47. def run_async_dump(test_name):
  48. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
  49. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  50. dump_path = os.path.join(tmp_dir, 'async_dump')
  51. dump_config_path = os.path.join(tmp_dir, 'async_dump.json')
  52. generate_dump_json(dump_path, dump_config_path, test_name)
  53. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  54. dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
  55. if os.path.isdir(dump_path):
  56. shutil.rmtree(dump_path)
  57. add = Net()
  58. add(Tensor(x), Tensor(y))
  59. for _ in range(3):
  60. if not os.path.exists(dump_file_path):
  61. time.sleep(2)
  62. check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
  63. assert len(os.listdir(dump_file_path)) == 1
  64. del os.environ['MINDSPORE_DUMP_CONFIG']
  65. @pytest.mark.level1
  66. @pytest.mark.platform_arm_ascend_training
  67. @pytest.mark.platform_x86_ascend_training
  68. @pytest.mark.env_onecard
  69. @security_off_wrap
  70. def test_async_dump():
  71. """
  72. Feature: async dump on Ascend
  73. Description: test async dump with default file_format value
  74. Expectation: dump data are generated as protobuf file format (suffix with timestamp)
  75. """
  76. run_async_dump("test_async_dump")
  77. @pytest.mark.skip(reason="wait for run package updates in Dec 01")
  78. @pytest.mark.level1
  79. @pytest.mark.platform_arm_ascend_training
  80. @pytest.mark.platform_x86_ascend_training
  81. @pytest.mark.env_onecard
  82. @security_off_wrap
  83. def test_async_dump_file_format():
  84. """
  85. Feature: async dump on Ascend in npy format
  86. Description: test async dump with file_format is configured as npy
  87. Expectation: dump data are generated as npy file format
  88. """
  89. run_async_dump("test_async_dump_file_format")
  90. def run_e2e_dump():
  91. if sys.platform != 'linux':
  92. return
  93. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  94. dump_path = os.path.join(tmp_dir, 'e2e_dump')
  95. dump_config_path = os.path.join(tmp_dir, 'e2e_dump.json')
  96. generate_dump_json(dump_path, dump_config_path, 'test_e2e_dump')
  97. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  98. dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
  99. if os.path.isdir(dump_path):
  100. shutil.rmtree(dump_path)
  101. add = Net()
  102. add(Tensor(x), Tensor(y))
  103. if context.get_context("device_target") == "Ascend":
  104. assert len(os.listdir(dump_file_path)) == 5
  105. output_name = "Add.Add-op*.0.0.*.output.0.DefaultFormat.npy"
  106. elif context.get_context("device_target") == "CPU":
  107. assert len(os.listdir(dump_file_path)) == 5
  108. output_name = "Add.Add-op*.0.0.*.output.0.DefaultFormat.npy"
  109. else:
  110. assert len(os.listdir(dump_file_path)) == 3
  111. output_name = "Add.Add-op*.0.0.*.output.0.DefaultFormat.npy"
  112. output_path = glob.glob(os.path.join(dump_file_path, output_name))[0]
  113. real_path = os.path.realpath(output_path)
  114. output = np.load(real_path)
  115. expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32)
  116. assert output.dtype == expect.dtype
  117. assert np.array_equal(output, expect)
  118. for _ in range(3):
  119. if not os.path.exists(dump_file_path):
  120. time.sleep(2)
  121. check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
  122. del os.environ['MINDSPORE_DUMP_CONFIG']
  123. @pytest.mark.level0
  124. @pytest.mark.platform_arm_ascend_training
  125. @pytest.mark.platform_x86_ascend_training
  126. @pytest.mark.env_onecard
  127. @security_off_wrap
  128. def test_e2e_dump():
  129. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
  130. run_e2e_dump()
  131. @pytest.mark.level0
  132. @pytest.mark.platform_arm_ascend_training
  133. @pytest.mark.platform_x86_ascend_training
  134. @pytest.mark.env_onecard
  135. @security_off_wrap
  136. def test_e2e_dump_with_hccl_env():
  137. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
  138. os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
  139. os.environ["RANK_ID"] = "4"
  140. run_e2e_dump()
  141. del os.environ['RANK_TABLE_FILE']
  142. del os.environ['RANK_ID']
  143. @pytest.mark.level0
  144. @pytest.mark.platform_x86_cpu
  145. @pytest.mark.env_onecard
  146. @security_off_wrap
  147. def test_cpu_e2e_dump():
  148. context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
  149. run_e2e_dump()
  150. @pytest.mark.level0
  151. @pytest.mark.platform_x86_cpu
  152. @pytest.mark.env_onecard
  153. @security_off_wrap
  154. def test_cpu_e2e_dump_with_hccl_set():
  155. context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
  156. os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
  157. os.environ["RANK_ID"] = "4"
  158. run_e2e_dump()
  159. del os.environ['RANK_TABLE_FILE']
  160. del os.environ['RANK_ID']
  161. @pytest.mark.level0
  162. @pytest.mark.platform_x86_gpu_training
  163. @pytest.mark.env_onecard
  164. @security_off_wrap
  165. def test_gpu_e2e_dump():
  166. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  167. run_e2e_dump()
  168. @pytest.mark.level0
  169. @pytest.mark.platform_x86_gpu_training
  170. @pytest.mark.env_onecard
  171. @security_off_wrap
  172. def test_gpu_e2e_dump_with_hccl_set():
  173. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  174. os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
  175. os.environ["RANK_ID"] = "4"
  176. run_e2e_dump()
  177. del os.environ['RANK_TABLE_FILE']
  178. del os.environ['RANK_ID']
  179. class ReluReduceMeanDenseRelu(Cell):
  180. def __init__(self, kernel, bias, in_channel, num_class):
  181. super().__init__()
  182. self.relu = P.ReLU()
  183. self.mean = P.ReduceMean(keep_dims=False)
  184. self.dense = Dense(in_channel, num_class, kernel, bias)
  185. def construct(self, x_):
  186. x_ = self.relu(x_)
  187. x_ = self.mean(x_, (2, 3))
  188. x_ = self.dense(x_)
  189. x_ = self.relu(x_)
  190. return x_
  191. @pytest.mark.level0
  192. @pytest.mark.platform_arm_ascend_training
  193. @pytest.mark.platform_x86_ascend_training
  194. @pytest.mark.env_onecard
  195. @security_off_wrap
  196. def test_async_dump_net_multi_layer_mode1():
  197. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
  198. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  199. dump_path = os.path.join(tmp_dir, 'async_dump_net_multi_layer_mode1')
  200. json_file_path = os.path.join(tmp_dir, "test_async_dump_net_multi_layer_mode1.json")
  201. generate_dump_json(dump_path, json_file_path, 'test_async_dump_net_multi_layer_mode1')
  202. os.environ['MINDSPORE_DUMP_CONFIG'] = json_file_path
  203. weight = Tensor(np.ones((1000, 2048)).astype(np.float32))
  204. bias = Tensor(np.ones((1000,)).astype(np.float32))
  205. net = ReluReduceMeanDenseRelu(weight, bias, 2048, 1000)
  206. criterion = SoftmaxCrossEntropyWithLogits(sparse=False)
  207. optimizer = Momentum(learning_rate=0.1, momentum=0.1,
  208. params=filter(lambda x: x.requires_grad, net.get_parameters()))
  209. net_with_criterion = WithLossCell(net, criterion)
  210. train_network = TrainOneStepCell(net_with_criterion, optimizer)
  211. train_network.set_train()
  212. inputs = Tensor(np.random.randn(32, 2048, 7, 7).astype(np.float32))
  213. label = Tensor(np.zeros(shape=(32, 1000)).astype(np.float32))
  214. net_dict = train_network(inputs, label)
  215. dump_file_path = os.path.join(dump_path, 'rank_0', 'test', '0', '0')
  216. dump_file_name = list(Path(dump_file_path).rglob("*SoftmaxCrossEntropyWithLogits*"))[0]
  217. dump_file_full_path = os.path.join(dump_file_path, dump_file_name)
  218. npy_path = os.path.join(dump_path, "npy_files")
  219. if os.path.exists(npy_path):
  220. shutil.rmtree(npy_path)
  221. os.mkdir(npy_path)
  222. tool_path_search_list = list(Path('/usr/local/Ascend').rglob('msaccucmp.py*'))
  223. if tool_path_search_list:
  224. converter = import_module("mindspore.offline_debug.convert_async")
  225. converter.AsyncDumpConverter([dump_file_full_path], npy_path).convert_files()
  226. npy_result_file = list(Path(npy_path).rglob("*output.0.*.npy"))[0]
  227. dump_result = np.load(os.path.join(npy_path, npy_result_file))
  228. for index, value in enumerate(net_dict):
  229. assert value.asnumpy() == dump_result[index]
  230. else:
  231. print('Failed to find hisi convert tools: msaccucmp.py or msaccucmp.pyc.')
  232. del os.environ['MINDSPORE_DUMP_CONFIG']
  233. @pytest.mark.level0
  234. @pytest.mark.platform_arm_ascend_training
  235. @pytest.mark.platform_x86_ascend_training
  236. @pytest.mark.env_onecard
  237. @security_off_wrap
  238. def test_dump_with_diagnostic_path():
  239. """
  240. Test e2e dump when path is not set (set to empty) in dump json file and MS_DIAGNOSTIC_DATA_PATH is set.
  241. Data is expected to be dumped into MS_DIAGNOSTIC_DATA_PATH/debug_dump.
  242. """
  243. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
  244. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  245. dump_config_path = os.path.join(tmp_dir, 'e2e_dump.json')
  246. generate_dump_json('', dump_config_path, 'test_e2e_dump')
  247. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  248. diagnose_path = os.path.join(tmp_dir, 'e2e_dump')
  249. os.environ['MS_DIAGNOSTIC_DATA_PATH'] = diagnose_path
  250. dump_file_path = os.path.join(diagnose_path, 'debug_dump', 'rank_0', 'Net', '0', '0')
  251. if os.path.isdir(diagnose_path):
  252. shutil.rmtree(diagnose_path)
  253. add = Net()
  254. add(Tensor(x), Tensor(y))
  255. assert len(os.listdir(dump_file_path)) == 5
  256. del os.environ['MINDSPORE_DUMP_CONFIG']
  257. del os.environ['MS_DIAGNOSTIC_DATA_PATH']
  258. def run_e2e_dump_execution_graph():
  259. """Run e2e dump and check execution order."""
  260. if sys.platform != 'linux':
  261. return
  262. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  263. dump_path = os.path.join(tmp_dir, 'e2e_dump_exe_graph')
  264. dump_config_path = os.path.join(tmp_dir, 'e2e_dump.json')
  265. generate_dump_json(dump_path, dump_config_path, 'test_e2e_dump')
  266. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  267. if os.path.isdir(dump_path):
  268. shutil.rmtree(dump_path)
  269. add = Net()
  270. add(Tensor(x), Tensor(y))
  271. exe_graph_path = os.path.join(dump_path, 'rank_0', 'execution_order')
  272. assert len(os.listdir(exe_graph_path)) == 2
  273. del os.environ['MINDSPORE_DUMP_CONFIG']
  274. @pytest.mark.level0
  275. @pytest.mark.platform_x86_gpu_training
  276. @pytest.mark.env_onecard
  277. @security_off_wrap
  278. def test_dump_with_execution_graph():
  279. """Test dump with execution graph on GPU."""
  280. context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
  281. run_e2e_dump_execution_graph()
  282. def run_overflow_dump():
  283. """Run async dump and generate overflow"""
  284. if sys.platform != 'linux':
  285. return
  286. overflow_x = np.array([60000, 60000]).astype(np.float16)
  287. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  288. dump_path = os.path.join(tmp_dir, 'overflow_dump')
  289. dump_config_path = os.path.join(tmp_dir, 'overflow_dump.json')
  290. generate_dump_json_with_overflow(dump_path, dump_config_path, 'test_async_dump', 3)
  291. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  292. if os.path.isdir(dump_path):
  293. shutil.rmtree(dump_path)
  294. add = Net()
  295. add(Tensor(overflow_x), Tensor(overflow_x))
  296. exe_graph_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
  297. for _ in range(5):
  298. if not os.path.exists(exe_graph_path):
  299. time.sleep(2)
  300. check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
  301. # check if overflow dump generate exact two files, and the naming format
  302. assert len(os.listdir(exe_graph_path)) == 2
  303. output_path = glob.glob(os.path.join(exe_graph_path, "Add.Default_Add-op0.*.*.*"))[0]
  304. overflow_path = glob.glob(os.path.join(exe_graph_path, "Opdebug.Node_OpDebug.*.*.*"))[0]
  305. assert output_path
  306. assert overflow_path
  307. # check if generated files have matching task and stream id
  308. output_file_name = os.path.split(output_path)
  309. overflow_file_name = os.path.split(overflow_path)
  310. output_second_dot_pos = find_nth_pos(output_file_name[1], ".", 2)
  311. output_third_dot_pos = find_nth_pos(output_file_name[1], ".", 3)
  312. output_fourth_dot_pos = find_nth_pos(output_file_name[1], ".", 4)
  313. output_task_id = output_file_name[1][output_second_dot_pos+1:output_third_dot_pos]
  314. output_stream_id = output_file_name[1][output_third_dot_pos+1:output_fourth_dot_pos]
  315. overflow_second_dot_pos = find_nth_pos(overflow_file_name[1], ".", 2)
  316. overflow_third_dot_pos = find_nth_pos(overflow_file_name[1], ".", 3)
  317. overflow_fourth_dot_pos = find_nth_pos(overflow_file_name[1], ".", 4)
  318. overflow_task_id = overflow_file_name[1][overflow_second_dot_pos+1:overflow_third_dot_pos]
  319. overflow_stream_id = overflow_file_name[1][overflow_third_dot_pos+1:overflow_fourth_dot_pos]
  320. assert output_task_id == overflow_task_id
  321. assert output_stream_id == overflow_stream_id
  322. # check if overflow dump file contains same task and stream id as file name
  323. with open(overflow_path, 'rb') as f:
  324. f.seek(321, 0)
  325. raw_data = f.read()
  326. task_id_infile = int.from_bytes(raw_data[24:25], 'little')
  327. stream_id_infile = int.from_bytes(raw_data[16:17], 'little')
  328. assert output_task_id == str(task_id_infile)
  329. assert output_stream_id == str(stream_id_infile)
  330. del os.environ['MINDSPORE_DUMP_CONFIG']
  331. def run_not_overflow_dump():
  332. """Run async dump and not generate overflow"""
  333. if sys.platform != 'linux':
  334. return
  335. overflow_x = np.array([60000, 60000]).astype(np.float16)
  336. overflow_y = np.array([2, 2]).astype(np.float16)
  337. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  338. dump_path = os.path.join(tmp_dir, 'overflow_dump')
  339. dump_config_path = os.path.join(tmp_dir, 'overflow_dump.json')
  340. generate_dump_json_with_overflow(dump_path, dump_config_path, 'test_async_dump', 3)
  341. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  342. if os.path.isdir(dump_path):
  343. shutil.rmtree(dump_path)
  344. add = Net()
  345. add(Tensor(overflow_x), Tensor(overflow_y))
  346. exe_graph_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
  347. # check no overflow is happening, and path should not be generated
  348. assert not os.path.exists(exe_graph_path)
  349. del os.environ['MINDSPORE_DUMP_CONFIG']
  350. @pytest.mark.level0
  351. @pytest.mark.platform_arm_ascend_training
  352. @pytest.mark.platform_x86_ascend_training
  353. @pytest.mark.env_onecard
  354. @security_off_wrap
  355. def test_ascend_overflow_dump():
  356. """
  357. Feature: Overflow Dump
  358. Description: Test overflow dump
  359. Expectation: Overflow is occurred, and overflow dump file is in correct format
  360. """
  361. context.set_context(mode=context.GRAPH_MODE, device_target='Ascend')
  362. run_overflow_dump()
  363. @pytest.mark.level0
  364. @pytest.mark.platform_arm_ascend_training
  365. @pytest.mark.platform_x86_ascend_training
  366. @pytest.mark.env_onecard
  367. @security_off_wrap
  368. def test_ascend_not_overflow_dump():
  369. """
  370. Feature: Overflow Dump
  371. Description: Test overflow dump
  372. Expectation: Overflow is not occurred, and overflow dump file is not generated
  373. """
  374. context.set_context(mode=context.GRAPH_MODE, device_target='Ascend')
  375. run_not_overflow_dump()
  376. def check_statistic_dump(dump_file_path):
  377. output_name = "statistic.csv"
  378. output_path = glob.glob(os.path.join(dump_file_path, output_name))[0]
  379. real_path = os.path.realpath(output_path)
  380. with open(real_path) as f:
  381. reader = csv.DictReader(f)
  382. input1 = next(reader)
  383. assert input1['IO'] == 'input'
  384. assert input1['Min Value'] == '1'
  385. assert input1['Max Value'] == '6'
  386. input2 = next(reader)
  387. assert input2['IO'] == 'input'
  388. assert input2['Min Value'] == '7'
  389. assert input2['Max Value'] == '12'
  390. output = next(reader)
  391. assert output['IO'] == 'output'
  392. assert output['Min Value'] == '8'
  393. assert output['Max Value'] == '18'
  394. def check_data_dump(dump_file_path):
  395. output_name = "Add.Add-op*.0.0.*.output.0.DefaultFormat.npy"
  396. output_path = glob.glob(os.path.join(dump_file_path, output_name))[0]
  397. real_path = os.path.realpath(output_path)
  398. output = np.load(real_path)
  399. expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32)
  400. assert np.array_equal(output, expect)
  401. def run_gpu_e2e_dump(saved_data):
  402. """Run gpu e2e dump"""
  403. if sys.platform != 'linux':
  404. return
  405. pwd = os.getcwd()
  406. with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
  407. dump_path = os.path.join(tmp_dir, 'gpu_e2e_dump')
  408. dump_config_path = os.path.join(tmp_dir, 'gpu_e2e_dump.json')
  409. generate_statistic_dump_json(dump_path, dump_config_path, 'test_gpu_e2e_dump', saved_data)
  410. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  411. dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
  412. if os.path.isdir(dump_path):
  413. shutil.rmtree(dump_path)
  414. add = Net()
  415. add(Tensor(x), Tensor(y))
  416. for _ in range(3):
  417. if not os.path.exists(dump_file_path):
  418. time.sleep(2)
  419. check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
  420. if saved_data in ('statistic', 'full'):
  421. check_statistic_dump(dump_file_path)
  422. if saved_data in ('tensor', 'full'):
  423. check_data_dump(dump_file_path)
  424. if saved_data == 'statistic':
  425. # assert only file is statistic.csv, tensor data is not saved
  426. assert len(os.listdir(dump_file_path)) == 1
  427. elif saved_data == 'tensor':
  428. # assert only tensor data is saved, not statistics
  429. stat_path = os.path.join(dump_file_path, 'statistic.csv')
  430. assert not os.path.isfile(stat_path)
  431. del os.environ['MINDSPORE_DUMP_CONFIG']
  432. @pytest.mark.level0
  433. @pytest.mark.platform_x86_gpu_training
  434. @pytest.mark.env_onecard
  435. @security_off_wrap
  436. def test_gpu_e2e_statistic_dump():
  437. """
  438. Feature: GPU Statistics Dump
  439. Description: Test GPU statistics dump
  440. Expectation: Statistics are stored in statistic.csv files
  441. """
  442. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  443. run_gpu_e2e_dump('statistic')
  444. @pytest.mark.level0
  445. @pytest.mark.platform_x86_gpu_training
  446. @pytest.mark.env_onecard
  447. @security_off_wrap
  448. def test_gpu_e2e_tensor_dump():
  449. """
  450. Feature: GPU Tensor Dump
  451. Description: Test GPU tensor dump
  452. Expectation: Tensor data are stored in npy files
  453. """
  454. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  455. run_gpu_e2e_dump('tensor')
  456. @pytest.mark.level0
  457. @pytest.mark.platform_x86_gpu_training
  458. @pytest.mark.env_onecard
  459. @security_off_wrap
  460. def test_gpu_e2e_full_dump():
  461. """
  462. Feature: GPU Full Dump
  463. Description: Test GPU full dump
  464. Expectation: Tensor are stored in npy files and their statistics stored in statistic.csv
  465. """
  466. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  467. run_gpu_e2e_dump('full')