You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_multi_root_graph_dump.py 5.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. import os
  16. from os import path
  17. import tempfile
  18. import time
  19. import shutil
  20. import csv
  21. import numpy as np
  22. import pytest
  23. import mindspore.context as context
  24. from mindspore import Tensor
  25. from mindspore.ops import operations as P
  26. from mindspore.nn import Cell
  27. from dump_test_utils import generate_dump_json
  28. from tests.security_utils import security_off_wrap
  29. class AddNet(Cell):
  30. def __init__(self):
  31. super(AddNet, self).__init__()
  32. self.add = P.TensorAdd()
  33. def construct(self, input_x, input_y):
  34. output_z = self.add(input_x, input_y)
  35. return output_z
  36. class NewAddNet(Cell):
  37. def __init__(self):
  38. super(NewAddNet, self).__init__()
  39. self.add = P.AddN()
  40. def construct(self, x, y):
  41. z = self.add([x, y, y])
  42. return z
  43. def train_addnet(epoch):
  44. net = AddNet()
  45. net2 = NewAddNet()
  46. output_list = []
  47. input_x = Tensor(np.ones([2, 1, 2, 1]).astype(np.float32))
  48. input_y = Tensor(np.ones([2, 1, 2, 1]).astype(np.float32))
  49. for _ in range(epoch):
  50. out_put = net(input_x, input_y)
  51. out2 = net2(out_put, input_x)
  52. output_list.append(out2.asnumpy())
  53. input_x = input_x + input_y
  54. def run_multi_root_graph_dump(device, dump_mode, test_name):
  55. """Run dump for multi root graph script."""
  56. context.set_context(mode=context.GRAPH_MODE, device_target=device)
  57. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  58. dump_path = os.path.join(tmp_dir, dump_mode)
  59. dump_config_path = os.path.join(tmp_dir, dump_mode + ".json")
  60. generate_dump_json(dump_path, dump_config_path, test_name)
  61. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  62. dump_file_path = os.path.join(dump_path, 'rank_0', 'Net')
  63. if os.path.isdir(dump_path):
  64. shutil.rmtree(dump_path)
  65. epoch = 3
  66. train_addnet(epoch)
  67. for _ in range(3):
  68. if not os.path.exists(dump_file_path):
  69. time.sleep(2)
  70. # Multi root graph script : we have 2 graphs under rank_0 dir
  71. # Each graph should have 3 iteration
  72. # Each graph was executed once per epoch,
  73. # Graph 0 was executed in even iterations, graph one was executed in odd iterations
  74. assert len(os.listdir(dump_file_path)) == 2
  75. dump_path_graph_0 = os.path.join(dump_file_path, '0')
  76. dump_path_graph_1 = os.path.join(dump_file_path, '1')
  77. assert sorted(os.listdir(dump_path_graph_0)) == ['0', '2', '4']
  78. assert sorted(os.listdir(dump_path_graph_1)) == ['1', '3', '5']
  79. execution_order_path = os.path.join(dump_path, 'rank_0', 'execution_order')
  80. # Four files in execution_order dir.
  81. # Two files for each graph (ms_execution_order and ms_global_execution_order)
  82. assert len(os.listdir(execution_order_path)) == 4
  83. global_exec_order_graph_0 = os.path.join(execution_order_path, 'ms_global_execution_order_graph_0.csv')
  84. assert path.exists(global_exec_order_graph_0)
  85. with open(global_exec_order_graph_0) as csvfile:
  86. history_graph_0 = csv.reader(csvfile)
  87. iter_list_graph_0 = list(history_graph_0)
  88. assert iter_list_graph_0 == [['0'], ['2'], ['4']]
  89. global_exec_order_graph_1 = os.path.join(execution_order_path, 'ms_global_execution_order_graph_1.csv')
  90. assert path.exists(global_exec_order_graph_1)
  91. with open(global_exec_order_graph_1) as csvfile:
  92. history_graph_1 = csv.reader(csvfile)
  93. iter_list_graph_1 = list(history_graph_1)
  94. assert iter_list_graph_1 == [['1'], ['3'], ['5']]
  95. @pytest.mark.level0
  96. @pytest.mark.platform_x86_gpu_training
  97. @pytest.mark.env_onecard
  98. @security_off_wrap
  99. def test_GPU_e2e_multi_root_graph_dump():
  100. """
  101. Feature:
  102. Multi root graph e2e dump for GPU.
  103. Description:
  104. Test multi root graph e2e dump GPU.
  105. Expectation:
  106. Dump for two different graphs, graph 0 even iterations and graph 1 odd iterations.
  107. """
  108. run_multi_root_graph_dump("GPU", "e2e_dump", "test_GPU_e2e_multi_root_graph_dump")
  109. @pytest.mark.level0
  110. @pytest.mark.platform_arm_ascend_training
  111. @pytest.mark.platform_x86_ascend_training
  112. @pytest.mark.env_onecard
  113. @security_off_wrap
  114. def test_Ascend_e2e_multi_root_graph_dump():
  115. """
  116. Feature:
  117. Multi root graph e2e dump for Ascend.
  118. Description:
  119. Test multi root graph e2e dump Ascend.
  120. Expectation:
  121. Dump for two different graphs, graph 0 even iterations and graph 1 odd iterations.
  122. """
  123. run_multi_root_graph_dump("Ascend", "e2e_dump", "test_Ascend_e2e_multi_root_graph_dump")
  124. @pytest.mark.level0
  125. @pytest.mark.platform_arm_ascend_training
  126. @pytest.mark.platform_x86_ascend_training
  127. @pytest.mark.env_onecard
  128. @security_off_wrap
  129. def test_Ascend_async_multi_root_graph_dump():
  130. """
  131. Feature:
  132. Multi root graph async dump for Ascend.
  133. Description:
  134. Test multi root graph async dump Ascend.
  135. Expectation:
  136. Dump for two different graphs, graph 0 even iterations and graph 1 odd iterations.
  137. """
  138. run_multi_root_graph_dump("Ascend", "async_dump", "test_Ascend_async_multi_root_graph_dump")