You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_cell_dump.py 8.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. import os
  16. import sys
  17. import tempfile
  18. import time
  19. import shutil
  20. import glob
  21. from enum import Enum
  22. import numpy as np
  23. import pytest
  24. from mindspore import Tensor, set_dump
  25. from mindspore.ops import operations as P
  26. from mindspore.nn import Cell
  27. from mindspore.nn import Dense
  28. from mindspore.nn import SoftmaxCrossEntropyWithLogits
  29. from mindspore.nn import Momentum
  30. from mindspore.nn import TrainOneStepCell
  31. from mindspore.nn import WithLossCell
  32. from dump_test_utils import generate_cell_dump_json, check_dump_structure
  33. from tests.security_utils import security_off_wrap
  34. class IsDump(Enum):
  35. SET_DUMP_TRUE = 1
  36. SET_DUMP_FALSE = 2
  37. SET_NONE = 3
  38. class ReluReduceMeanDenseRelu(Cell):
  39. def __init__(self, kernel, bias, in_channel, num_class):
  40. super().__init__()
  41. self.relu = P.ReLU()
  42. self.mean = P.ReduceMean(keep_dims=False)
  43. self.dense = Dense(in_channel, num_class, kernel, bias)
  44. def construct(self, x_):
  45. x_ = self.relu(x_)
  46. x_ = self.mean(x_, (2, 3))
  47. x_ = self.dense(x_)
  48. x_ = self.relu(x_)
  49. return x_
  50. def run_multi_layer_train(is_set_dump):
  51. weight = Tensor(np.ones((1000, 2048)).astype(np.float32))
  52. bias = Tensor(np.ones((1000,)).astype(np.float32))
  53. net = ReluReduceMeanDenseRelu(weight, bias, 2048, 1000)
  54. if is_set_dump is IsDump.SET_DUMP_TRUE:
  55. set_dump(net.relu)
  56. elif is_set_dump is IsDump.SET_DUMP_FALSE:
  57. set_dump(net.relu, enabled=False)
  58. set_dump(net.mean)
  59. criterion = SoftmaxCrossEntropyWithLogits(sparse=False)
  60. optimizer = Momentum(learning_rate=0.1, momentum=0.1,
  61. params=filter(lambda x: x.requires_grad, net.get_parameters()))
  62. net_with_criterion = WithLossCell(net, criterion)
  63. train_network = TrainOneStepCell(net_with_criterion, optimizer)
  64. train_network.set_train()
  65. inputs = Tensor(np.random.randn(32, 2048, 7, 7).astype(np.float32))
  66. label = Tensor(np.zeros(shape=(32, 1000)).astype(np.float32))
  67. train_network(inputs, label)
  68. @pytest.mark.level0
  69. @pytest.mark.platform_arm_ascend_training
  70. @pytest.mark.platform_x86_ascend_training
  71. @pytest.mark.env_onecard
  72. @security_off_wrap
  73. def test_ascend_cell_dump():
  74. """
  75. Feature: Cell Dump
  76. Description: Test cell dump
  77. Expectation: Only dump cell set by set_dump when dump_mode = 2
  78. """
  79. if sys.platform != 'linux':
  80. return
  81. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  82. dump_path = os.path.join(tmp_dir, 'cell_dump')
  83. dump_config_path = os.path.join(tmp_dir, 'cell_dump.json')
  84. generate_cell_dump_json(dump_path, dump_config_path, 'test_async_dump', 2)
  85. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  86. if os.path.isdir(dump_path):
  87. shutil.rmtree(dump_path)
  88. run_multi_layer_train(IsDump.SET_DUMP_TRUE)
  89. dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
  90. for _ in range(5):
  91. if not os.path.exists(dump_file_path):
  92. time.sleep(2)
  93. check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
  94. # make sure 2 relu dump files are generated with correct name prefix
  95. assert len(os.listdir(dump_file_path)) == 2
  96. relu_file_name = "ReLU.Default_network-WithLossCell__backbone-ReluReduceMeanDenseRelu_ReLU-op*.*.*.*"
  97. relu_file1 = glob.glob(os.path.join(dump_file_path, relu_file_name))[0]
  98. relu_file2 = glob.glob(os.path.join(dump_file_path, relu_file_name))[1]
  99. assert relu_file1
  100. assert relu_file2
  101. del os.environ['MINDSPORE_DUMP_CONFIG']
  102. @pytest.mark.level0
  103. @pytest.mark.platform_arm_ascend_training
  104. @pytest.mark.platform_x86_ascend_training
  105. @pytest.mark.env_onecard
  106. @security_off_wrap
  107. def test_ascend_not_cell_dump():
  108. """
  109. Feature: Cell Dump
  110. Description: Test cell dump
  111. Expectation: Should ignore set_dump when dump_mode != 2
  112. """
  113. if sys.platform != 'linux':
  114. return
  115. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  116. dump_path = os.path.join(tmp_dir, 'cell_dump')
  117. dump_config_path = os.path.join(tmp_dir, 'cell_dump.json')
  118. generate_cell_dump_json(dump_path, dump_config_path, 'test_async_dump', 0)
  119. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  120. if os.path.isdir(dump_path):
  121. shutil.rmtree(dump_path)
  122. run_multi_layer_train(IsDump.SET_DUMP_TRUE)
  123. dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
  124. for _ in range(5):
  125. if not os.path.exists(dump_file_path):
  126. time.sleep(2)
  127. check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
  128. # make sure set_dump is ignored and all cell layer are dumped
  129. assert len(os.listdir(dump_file_path)) == 10
  130. del os.environ['MINDSPORE_DUMP_CONFIG']
  131. @pytest.mark.level0
  132. @pytest.mark.platform_arm_ascend_training
  133. @pytest.mark.platform_x86_ascend_training
  134. @pytest.mark.env_onecard
  135. @security_off_wrap
  136. def test_ascend_cell_empty_dump():
  137. """
  138. Feature: Cell Dump
  139. Description: Test cell dump
  140. Expectation: Should dump nothing when set_dump is not set and dump_mode = 2
  141. """
  142. if sys.platform != 'linux':
  143. return
  144. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  145. dump_path = os.path.join(tmp_dir, 'cell_dump')
  146. dump_config_path = os.path.join(tmp_dir, 'cell_dump.json')
  147. generate_cell_dump_json(dump_path, dump_config_path, 'test_async_dump', 2)
  148. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  149. if os.path.isdir(dump_path):
  150. shutil.rmtree(dump_path)
  151. run_multi_layer_train(IsDump.SET_NONE)
  152. dump_file_path = os.path.join(dump_path, 'rank_0', 'Net')
  153. time.sleep(5)
  154. # make sure no files are dumped
  155. assert not os.path.exists(dump_file_path)
  156. del os.environ['MINDSPORE_DUMP_CONFIG']
  157. @pytest.mark.level0
  158. @pytest.mark.platform_arm_ascend_training
  159. @pytest.mark.platform_x86_ascend_training
  160. @pytest.mark.env_onecard
  161. @security_off_wrap
  162. def test_ascend_cell_dump_set_enable_false():
  163. """
  164. Feature: Cell Dump
  165. Description: Test cell dump
  166. Expectation: Should ignore set_dump when enabled=False
  167. """
  168. if sys.platform != 'linux':
  169. return
  170. with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
  171. dump_path = os.path.join(tmp_dir, 'cell_dump')
  172. dump_config_path = os.path.join(tmp_dir, 'cell_dump.json')
  173. generate_cell_dump_json(dump_path, dump_config_path, 'test_async_dump', 2)
  174. os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
  175. if os.path.isdir(dump_path):
  176. shutil.rmtree(dump_path)
  177. run_multi_layer_train(IsDump.SET_DUMP_FALSE)
  178. dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
  179. for _ in range(5):
  180. if not os.path.exists(dump_file_path):
  181. time.sleep(1)
  182. check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
  183. # make sure directory has dumped files with enabled=True
  184. assert len(os.listdir(dump_file_path)) == 1
  185. mean_file_name = "ReduceMean.Default_network-WithLossCell__backbone-ReluReduceMeanDenseRelu_ReduceMean-*.*.*.*"
  186. mean_file = glob.glob(os.path.join(dump_file_path, mean_file_name))[0]
  187. assert mean_file
  188. del os.environ['MINDSPORE_DUMP_CONFIG']