You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_profiling_startstop.py 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Test MindData Profiling Start and Stop Support
  17. """
  18. import json
  19. import os
  20. import numpy as np
  21. import pytest
  22. import mindspore.common.dtype as mstype
  23. import mindspore.dataset as ds
  24. import mindspore._c_dataengine as cde
  25. import mindspore.dataset.transforms.c_transforms as C
  26. FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
  27. DATASET_ROOT = "../data/dataset/testTFTestAllTypes/"
  28. SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
  29. # Add file name to rank id mapping so that each profiling file name is unique,
  30. # to support parallel test execution
  31. file_name_map_rank_id = {"test_profiling_early_stop": "0",
  32. "test_profiling_delayed_start": "1",
  33. "test_profiling_start_start": "2",
  34. "test_profiling_multiple_start_stop": "3",
  35. "test_profiling_stop_stop": "4",
  36. "test_profiling_stop_nostart": "5"}
  37. @pytest.mark.forked
  38. class TestMindDataProfilingStartStop:
  39. """
  40. Test MindData Profiling Manager Start-Stop Support
  41. """
  42. def setup_class(self):
  43. """
  44. Run once for the class
  45. """
  46. self._pipeline_file = "./pipeline_profiling"
  47. self._cpu_util_file = "./minddata_cpu_utilization"
  48. self._dataset_iterator_file = "./dataset_iterator_profiling"
  49. def setup_method(self):
  50. """
  51. Run before each test function.
  52. """
  53. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  54. file_id = file_name_map_rank_id[file_name]
  55. self.pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  56. self.cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  57. self.dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
  58. # Confirm MindData Profiling files do not yet exist
  59. assert os.path.exists(self.pipeline_file) is False
  60. assert os.path.exists(self.cpu_util_file) is False
  61. assert os.path.exists(self.dataset_iterator_file) is False
  62. # Set the MindData Profiling related environment variables
  63. os.environ['RANK_ID'] = file_id
  64. os.environ['DEVICE_ID'] = file_id
  65. def teardown_method(self):
  66. """
  67. Run after each test function.
  68. """
  69. # Delete MindData profiling files generated from the test.
  70. if os.path.exists(self.pipeline_file):
  71. os.remove(self.pipeline_file)
  72. if os.path.exists(self.cpu_util_file):
  73. os.remove(self.cpu_util_file)
  74. if os.path.exists(self.dataset_iterator_file):
  75. os.remove(self.dataset_iterator_file)
  76. # Disable MindData Profiling related environment variables
  77. del os.environ['RANK_ID']
  78. del os.environ['DEVICE_ID']
  79. def confirm_pipeline_file(self, num_ops, op_list=None):
  80. """
  81. Confirm pipeline JSON file with <num_ops> in the pipeline and the given optional list of ops
  82. """
  83. with open(self.pipeline_file) as file1:
  84. data = json.load(file1)
  85. op_info = data["op_info"]
  86. # Confirm ops in pipeline file
  87. assert len(op_info) == num_ops
  88. if op_list:
  89. for i in range(num_ops):
  90. assert op_info[i]["op_type"] in op_list
  91. def confirm_cpuutil_file(self, num_pipeline_ops):
  92. """
  93. Confirm CPU utilization JSON file with <num_pipeline_ops> in the pipeline
  94. """
  95. with open(self.cpu_util_file) as file1:
  96. data = json.load(file1)
  97. op_info = data["op_info"]
  98. assert len(op_info) == num_pipeline_ops
  99. def confirm_dataset_iterator_file(self, num_batches):
  100. """
  101. Confirm dataset iterator file exists with the correct number of rows in the file
  102. """
  103. assert os.path.exists(self.dataset_iterator_file)
  104. actual_num_lines = sum(1 for _ in open(self.dataset_iterator_file))
  105. # Confirm there are 4 lines for each batch in the dataset iterator file
  106. assert actual_num_lines == 4 * num_batches
  107. def test_profiling_early_stop(self):
  108. """
  109. Test MindData Profiling with Early Stop; profile for some iterations and then stop profiling
  110. """
  111. def source1():
  112. for i in range(8000):
  113. yield (np.array([i]),)
  114. # Get instance pointer for MindData profiling manager
  115. md_profiler = cde.GlobalContext.profiling_manager()
  116. # Initialize MindData profiling manager
  117. md_profiler.init()
  118. # Start MindData Profiling
  119. md_profiler.start()
  120. # Create this basic and common pipeline
  121. # Leaf/Source-Op -> Map -> Batch
  122. data1 = ds.GeneratorDataset(source1, ["col1"])
  123. type_cast_op = C.TypeCast(mstype.int32)
  124. data1 = data1.map(operations=type_cast_op, input_columns="col1")
  125. data1 = data1.batch(16)
  126. num_iter = 0
  127. # Note: If create_dict_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
  128. for _ in data1.create_dict_iterator(num_epochs=2):
  129. if num_iter == 400:
  130. # Stop MindData Profiling and Save MindData Profiling Output
  131. md_profiler.stop()
  132. md_profiler.save(os.getcwd())
  133. num_iter += 1
  134. assert num_iter == 500
  135. # Confirm the content of the profiling files, including 4 ops in the pipeline JSON file
  136. self.confirm_pipeline_file(4, ["GeneratorOp", "BatchOp", "MapOp", "EpochCtrlOp"])
  137. self.confirm_cpuutil_file(4)
  138. self.confirm_dataset_iterator_file(401)
  139. def test_profiling_delayed_start(self):
  140. """
  141. Test MindData Profiling with Delayed Start; profile for subset of iterations
  142. """
  143. def source1():
  144. for i in range(8000):
  145. yield (np.array([i]),)
  146. # Get instance pointer for MindData profiling manager
  147. md_profiler = cde.GlobalContext.profiling_manager()
  148. # Initialize MindData profiling manager
  149. md_profiler.init()
  150. # Create this basic and common pipeline
  151. # Leaf/Source-Op -> Map -> Batch
  152. data1 = ds.GeneratorDataset(source1, ["col1"])
  153. type_cast_op = C.TypeCast(mstype.int32)
  154. data1 = data1.map(operations=type_cast_op, input_columns="col1")
  155. data1 = data1.batch(16)
  156. num_iter = 0
  157. # Note: If create_dict_iterator() is called with num_epochs=1, then EpochCtrlOp is not added to the pipeline
  158. for _ in data1.create_dict_iterator(num_epochs=1):
  159. if num_iter == 5:
  160. # Start MindData Profiling
  161. md_profiler.start()
  162. elif num_iter == 400:
  163. # Stop MindData Profiling and Save MindData Profiling Output
  164. md_profiler.stop()
  165. md_profiler.save(os.getcwd())
  166. num_iter += 1
  167. assert num_iter == 500
  168. # Confirm the content of the profiling files, including 3 ops in the pipeline JSON file
  169. self.confirm_pipeline_file(3, ["GeneratorOp", "BatchOp", "MapOp"])
  170. self.confirm_cpuutil_file(3)
  171. self.confirm_dataset_iterator_file(395)
  172. def test_profiling_multiple_start_stop(self):
  173. """
  174. Test MindData Profiling with Delayed Start and Multiple Start-Stop Sequences
  175. """
  176. def source1():
  177. for i in range(8000):
  178. yield (np.array([i]),)
  179. # Get instance pointer for MindData profiling manager
  180. md_profiler = cde.GlobalContext.profiling_manager()
  181. # Initialize MindData profiling manager
  182. md_profiler.init()
  183. # Create this basic and common pipeline
  184. # Leaf/Source-Op -> Map -> Batch
  185. data1 = ds.GeneratorDataset(source1, ["col1"])
  186. type_cast_op = C.TypeCast(mstype.int32)
  187. data1 = data1.map(operations=type_cast_op, input_columns="col1")
  188. data1 = data1.batch(16)
  189. num_iter = 0
  190. # Note: If create_dict_iterator() is called with num_epochs=1, then EpochCtrlOp is not added to the pipeline
  191. for _ in data1.create_dict_iterator(num_epochs=1):
  192. if num_iter == 5:
  193. # Start MindData Profiling
  194. md_profiler.start()
  195. elif num_iter == 40:
  196. # Stop MindData Profiling
  197. md_profiler.stop()
  198. if num_iter == 200:
  199. # Start MindData Profiling
  200. md_profiler.start()
  201. elif num_iter == 400:
  202. # Stop MindData Profiling
  203. md_profiler.stop()
  204. num_iter += 1
  205. # Save MindData Profiling Output
  206. md_profiler.save(os.getcwd())
  207. assert num_iter == 500
  208. # Confirm the content of the profiling files, including 3 ops in the pipeline JSON file
  209. self.confirm_pipeline_file(3, ["GeneratorOp", "BatchOp", "MapOp"])
  210. self.confirm_cpuutil_file(3)
  211. # Note: The dataset iterator file should only contain data for batches 200 to 400
  212. self.confirm_dataset_iterator_file(200)
  213. def test_profiling_start_start(self):
  214. """
  215. Test MindData Profiling with Start followed by Start - user error scenario
  216. """
  217. # Get instance pointer for MindData profiling manager
  218. md_profiler = cde.GlobalContext.profiling_manager()
  219. # Initialize MindData profiling manager
  220. md_profiler.init()
  221. # Start MindData Profiling
  222. md_profiler.start()
  223. with pytest.raises(RuntimeError) as info:
  224. # Reissue Start MindData Profiling
  225. md_profiler.start()
  226. assert "MD ProfilingManager is already running." in str(info)
  227. # Stop MindData Profiling
  228. md_profiler.stop()
  229. def test_profiling_stop_stop(self):
  230. """
  231. Test MindData Profiling with Stop followed by Stop - user warning scenario
  232. """
  233. # Get instance pointer for MindData profiling manager
  234. md_profiler = cde.GlobalContext.profiling_manager()
  235. # Initialize MindData profiling manager
  236. md_profiler.init()
  237. # Start MindData Profiling
  238. md_profiler.start()
  239. # Stop MindData Profiling and Save MindData Profiling Output
  240. md_profiler.stop()
  241. md_profiler.save(os.getcwd())
  242. # Reissue Stop MindData Profiling
  243. # A warning "MD ProfilingManager had already stopped" is produced.
  244. md_profiler.stop()
  245. def test_profiling_stop_nostart(self):
  246. """
  247. Test MindData Profiling with Stop not without prior Start - user error scenario
  248. """
  249. # Get instance pointer for MindData profiling manager
  250. md_profiler = cde.GlobalContext.profiling_manager()
  251. # Initialize MindData profiling manager
  252. md_profiler.init()
  253. with pytest.raises(RuntimeError) as info:
  254. # Stop MindData Profiling - without prior Start()
  255. md_profiler.stop()
  256. assert "MD ProfilingManager has not started yet." in str(info)
  257. # Start MindData Profiling
  258. md_profiler.start()
  259. # Stop MindData Profiling - to return profiler to a healthy state
  260. md_profiler.stop()