You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_profiling.py 24 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. # Copyright 2020-2022 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Testing profiling support in DE
  17. """
  18. import json
  19. import os
  20. import numpy as np
  21. import pytest
  22. import mindspore.common.dtype as mstype
  23. import mindspore.dataset as ds
  24. import mindspore.dataset.transforms.c_transforms as C
  25. import mindspore.dataset.vision.c_transforms as vision
  26. import mindspore._c_dataengine as cde
  27. FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
  28. DATASET_ROOT = "../data/dataset/testTFTestAllTypes/"
  29. SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
  30. @pytest.mark.forked
  31. class TestMinddataProfilingManager:
  32. """
  33. Test MinddataProfilingManager
  34. Note: Use pytest fixture tmp_path to create files within this temporary directory,
  35. which is automatically created for each test and deleted at the end of the test.
  36. """
  37. def setup_class(self):
  38. """
  39. Run once for the class
  40. """
  41. # Get instance pointer for MindData profiling manager
  42. self.md_profiler = cde.GlobalContext.profiling_manager()
  43. def setup_method(self):
  44. """
  45. Run before each test function.
  46. """
  47. # Set the MindData Profiling related environment variables
  48. os.environ['RANK_ID'] = "1"
  49. os.environ['DEVICE_ID'] = "1"
  50. # Initialize MindData profiling manager
  51. self.md_profiler.init()
  52. # Start MindData Profiling
  53. self.md_profiler.start()
  54. def teardown_method(self):
  55. """
  56. Run after each test function.
  57. """
  58. # Disable MindData Profiling related environment variables
  59. del os.environ['RANK_ID']
  60. del os.environ['DEVICE_ID']
  61. def confirm_cpuutil(self, cpu_util_file, num_pipeline_ops):
  62. """
  63. Confirm CPU utilization JSON file with <num_pipeline_ops> in the pipeline
  64. """
  65. with open(cpu_util_file) as file1:
  66. data = json.load(file1)
  67. op_info = data["op_info"]
  68. assert len(op_info) == num_pipeline_ops
  69. # Confirm memory fields exist
  70. assert "pss_mbytes" in data["process_memory_info"]
  71. assert "rss_mbytes" in data["process_memory_info"]
  72. assert "vss_mbytes" in data["process_memory_info"]
  73. assert "available_sys_memory_mbytes" in data["system_memory_info"]
  74. assert "total_sys_memory_mbytes" in data["system_memory_info"]
  75. assert "used_sys_memory_mbytes" in data["system_memory_info"]
  76. # Perform sanity check on memory information
  77. assert data["process_memory_info"]["pss_mbytes"][0] > 0
  78. assert data["process_memory_info"]["rss_mbytes"][0] > 0
  79. assert data["process_memory_info"]["vss_mbytes"][0] > 0
  80. assert data["system_memory_info"]["available_sys_memory_mbytes"][0] > 0
  81. assert data["system_memory_info"]["total_sys_memory_mbytes"][0] > 0
  82. assert data["system_memory_info"]["used_sys_memory_mbytes"][0] > 0
  83. def confirm_ops_in_pipeline(self, pipeline_file, num_ops, op_list):
  84. """
  85. Confirm pipeline JSON file with <num_ops> are in the pipeline and the given list of ops
  86. """
  87. with open(pipeline_file) as file1:
  88. data = json.load(file1)
  89. op_info = data["op_info"]
  90. # Confirm ops in pipeline file
  91. assert len(op_info) == num_ops
  92. for i in range(num_ops):
  93. assert op_info[i]["op_type"] in op_list
  94. def confirm_dataset_iterator_file(self, dataset_iterator_file, num_batches):
  95. """
  96. Confirm dataset iterator file exists with the correct number of rows in the file
  97. """
  98. assert os.path.exists(dataset_iterator_file)
  99. actual_num_lines = sum(1 for _ in open(dataset_iterator_file))
  100. # Confirm there are 4 lines for each batch in the dataset iterator file
  101. assert actual_num_lines == 4 * num_batches
  102. def test_profiling_simple_pipeline(self, tmp_path):
  103. """
  104. Generator -> Shuffle -> Batch
  105. """
  106. source = [(np.array([x]),) for x in range(1024)]
  107. data1 = ds.GeneratorDataset(source, ["data"])
  108. data1 = data1.shuffle(64)
  109. data1 = data1.batch(32)
  110. # Check output shape type and dataset size
  111. assert data1.output_shapes() == [[32, 1]]
  112. assert [str(tp) for tp in data1.output_types()] == ["int64"]
  113. assert data1.get_dataset_size() == 32
  114. # Stop MindData Profiling and save output files to tmp_path
  115. self.md_profiler.stop()
  116. self.md_profiler.save(str(tmp_path))
  117. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  118. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  119. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  120. # Confirm no profiling files are produced (since no MindData pipeline has been executed)
  121. assert os.path.exists(pipeline_file) is False
  122. assert os.path.exists(cpu_util_file) is False
  123. assert os.path.exists(dataset_iterator_file) is False
  124. # Start MindData Profiling
  125. self.md_profiler.start()
  126. # Execute MindData Pipeline
  127. for _ in data1:
  128. pass
  129. # Stop MindData Profiling and save output files to tmp_path
  130. self.md_profiler.stop()
  131. self.md_profiler.save(str(tmp_path))
  132. # Confirm profiling files now exist
  133. assert os.path.exists(pipeline_file) is True
  134. assert os.path.exists(cpu_util_file) is True
  135. assert os.path.exists(dataset_iterator_file) is True
  136. def test_profiling_complex_pipeline(self, tmp_path):
  137. """
  138. Generator -> Map ->
  139. -> Zip
  140. TFReader -> Shuffle ->
  141. """
  142. source = [(np.array([x]),) for x in range(1024)]
  143. data1 = ds.GeneratorDataset(source, ["gen"])
  144. data1 = data1.map(operations=[(lambda x: x + 1)], input_columns=["gen"])
  145. pattern = DATASET_ROOT + "/test.data"
  146. data2 = ds.TFRecordDataset(pattern, SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
  147. data2 = data2.shuffle(4)
  148. data3 = ds.zip((data1, data2))
  149. for _ in data3:
  150. pass
  151. # Stop MindData Profiling and save output files to tmp_path
  152. self.md_profiler.stop()
  153. self.md_profiler.save(str(tmp_path))
  154. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  155. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  156. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  157. with open(pipeline_file) as f:
  158. data = json.load(f)
  159. op_info = data["op_info"]
  160. assert len(op_info) == 5
  161. for i in range(5):
  162. if op_info[i]["op_type"] != "ZipOp":
  163. assert "size" in op_info[i]["metrics"]["output_queue"]
  164. assert "length" in op_info[i]["metrics"]["output_queue"]
  165. else:
  166. # Note: Zip is an inline op and hence does not have metrics information
  167. assert op_info[i]["metrics"] is None
  168. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  169. self.confirm_cpuutil(cpu_util_file, 5)
  170. # Confirm dataset iterator file content
  171. self.confirm_dataset_iterator_file(dataset_iterator_file, 12)
  172. def test_profiling_inline_ops_pipeline1(self, tmp_path):
  173. """
  174. Test pipeline with inline ops: Concat and EpochCtrl
  175. Generator ->
  176. Concat -> EpochCtrl
  177. Generator ->
  178. """
  179. # In source1 dataset: Number of rows is 3; its values are 0, 1, 2
  180. def source1():
  181. for i in range(3):
  182. yield (np.array([i]),)
  183. # In source2 dataset: Number of rows is 7; its values are 3, 4, 5 ... 9
  184. def source2():
  185. for i in range(3, 10):
  186. yield (np.array([i]),)
  187. data1 = ds.GeneratorDataset(source1, ["col1"])
  188. data2 = ds.GeneratorDataset(source2, ["col1"])
  189. data3 = data1.concat(data2)
  190. num_iter = 0
  191. # Note: set num_epochs=2 in create_tuple_iterator(), so that EpochCtrl op is added to the pipeline
  192. # Here i refers to index, d refers to data element
  193. for i, d in enumerate(data3.create_tuple_iterator(num_epochs=2, output_numpy=True)):
  194. num_iter += 1
  195. t = d
  196. assert i == t[0][0]
  197. assert num_iter == 10
  198. # Stop MindData Profiling and save output files to tmp_path
  199. self.md_profiler.stop()
  200. self.md_profiler.save(str(tmp_path))
  201. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  202. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  203. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  204. # Confirm pipeline is created with EpochCtrl op
  205. with open(pipeline_file) as f:
  206. data = json.load(f)
  207. op_info = data["op_info"]
  208. assert len(op_info) == 4
  209. for i in range(4):
  210. # Note: The following ops are inline ops: Concat, EpochCtrl
  211. if op_info[i]["op_type"] in ("ConcatOp", "EpochCtrlOp"):
  212. # Confirm these inline ops do not have metrics information
  213. assert op_info[i]["metrics"] is None
  214. else:
  215. assert "size" in op_info[i]["metrics"]["output_queue"]
  216. assert "length" in op_info[i]["metrics"]["output_queue"]
  217. # Confirm CPU util JSON file content, when 4 ops are in the pipeline JSON file
  218. self.confirm_cpuutil(cpu_util_file, 4)
  219. # Confirm dataset iterator file content
  220. self.confirm_dataset_iterator_file(dataset_iterator_file, 10)
  221. def test_profiling_inline_ops_pipeline2(self, tmp_path):
  222. """
  223. Test pipeline with many inline ops
  224. Generator -> Rename -> Skip -> Repeat -> Take
  225. """
  226. # In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9
  227. def source1():
  228. for i in range(10):
  229. yield (np.array([i]),)
  230. data1 = ds.GeneratorDataset(source1, ["col1"])
  231. data1 = data1.rename(input_columns=["col1"], output_columns=["newcol1"])
  232. data1 = data1.skip(2)
  233. data1 = data1.repeat(2)
  234. data1 = data1.take(12)
  235. for _ in data1:
  236. pass
  237. # Stop MindData Profiling and save output files to tmp_path
  238. self.md_profiler.stop()
  239. self.md_profiler.save(str(tmp_path))
  240. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  241. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  242. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  243. with open(pipeline_file) as f:
  244. data = json.load(f)
  245. op_info = data["op_info"]
  246. assert len(op_info) == 5
  247. for i in range(5):
  248. # Check for these inline ops
  249. if op_info[i]["op_type"] in ("RenameOp", "RepeatOp", "SkipOp", "TakeOp"):
  250. # Confirm these inline ops do not have metrics information
  251. assert op_info[i]["metrics"] is None
  252. else:
  253. assert "size" in op_info[i]["metrics"]["output_queue"]
  254. assert "length" in op_info[i]["metrics"]["output_queue"]
  255. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  256. self.confirm_cpuutil(cpu_util_file, 5)
  257. # Confirm dataset iterator file content
  258. self.confirm_dataset_iterator_file(dataset_iterator_file, 12)
  259. def test_profiling_sampling_interval(self, tmp_path):
  260. """
  261. Test non-default monitor sampling interval
  262. """
  263. interval_origin = ds.config.get_monitor_sampling_interval()
  264. ds.config.set_monitor_sampling_interval(30)
  265. interval = ds.config.get_monitor_sampling_interval()
  266. assert interval == 30
  267. source = [(np.array([x]),) for x in range(1024)]
  268. data1 = ds.GeneratorDataset(source, ["data"])
  269. data1 = data1.shuffle(64)
  270. data1 = data1.batch(32)
  271. for _ in data1:
  272. pass
  273. ds.config.set_monitor_sampling_interval(interval_origin)
  274. # Stop MindData Profiling and save output files to tmp_path
  275. self.md_profiler.stop()
  276. self.md_profiler.save(str(tmp_path))
  277. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  278. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  279. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  280. # Confirm pipeline file and CPU util file each have 3 ops
  281. self.confirm_ops_in_pipeline(pipeline_file, 3, ["GeneratorOp", "BatchOp", "ShuffleOp"])
  282. self.confirm_cpuutil(cpu_util_file, 3)
  283. # Confirm dataset iterator file content
  284. self.confirm_dataset_iterator_file(dataset_iterator_file, 32)
  285. def test_profiling_basic_pipeline(self, tmp_path):
  286. """
  287. Test with this basic pipeline
  288. Generator -> Map -> Batch -> Repeat -> EpochCtrl
  289. """
  290. def source1():
  291. for i in range(8000):
  292. yield (np.array([i]),)
  293. # Create this basic and common pipeline
  294. # Leaf/Source-Op -> Map -> Batch -> Repeat
  295. data1 = ds.GeneratorDataset(source1, ["col1"])
  296. type_cast_op = C.TypeCast(mstype.int32)
  297. data1 = data1.map(operations=type_cast_op, input_columns="col1")
  298. data1 = data1.batch(16)
  299. data1 = data1.repeat(2)
  300. num_iter = 0
  301. # Note: If create_dict_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
  302. for _ in data1.create_dict_iterator(num_epochs=2):
  303. num_iter += 1
  304. assert num_iter == 1000
  305. # Stop MindData Profiling and save output files to tmp_path
  306. self.md_profiler.stop()
  307. self.md_profiler.save(str(tmp_path))
  308. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  309. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  310. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  311. with open(pipeline_file) as f:
  312. data = json.load(f)
  313. op_info = data["op_info"]
  314. assert len(op_info) == 5
  315. for i in range(5):
  316. # Check for inline ops
  317. if op_info[i]["op_type"] in ("EpochCtrlOp", "RepeatOp"):
  318. # Confirm these inline ops do not have metrics information
  319. assert op_info[i]["metrics"] is None
  320. else:
  321. assert "size" in op_info[i]["metrics"]["output_queue"]
  322. assert "length" in op_info[i]["metrics"]["output_queue"]
  323. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  324. self.confirm_cpuutil(cpu_util_file, 5)
  325. # Confirm dataset iterator file content
  326. self.confirm_dataset_iterator_file(dataset_iterator_file, 1000)
  327. def test_profiling_cifar10_pipeline(self, tmp_path):
  328. """
  329. Test with this common pipeline with Cifar10
  330. Cifar10 -> Map -> Map -> Batch -> Repeat
  331. """
  332. # Create this common pipeline
  333. # Cifar10 -> Map -> Map -> Batch -> Repeat
  334. DATA_DIR_10 = "../data/dataset/testCifar10Data"
  335. data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=8000)
  336. type_cast_op = C.TypeCast(mstype.int32)
  337. data1 = data1.map(operations=type_cast_op, input_columns="label")
  338. random_horizontal_op = vision.RandomHorizontalFlip()
  339. data1 = data1.map(operations=random_horizontal_op, input_columns="image")
  340. data1 = data1.batch(32)
  341. data1 = data1.repeat(3)
  342. num_iter = 0
  343. # Note: If create_dict_iterator() is called with num_epochs=1, then EpochCtrlOp is NOT added to the pipeline
  344. for _ in data1.create_dict_iterator(num_epochs=1):
  345. num_iter += 1
  346. assert num_iter == 750
  347. # Stop MindData Profiling and save output files to tmp_path
  348. self.md_profiler.stop()
  349. self.md_profiler.save(str(tmp_path))
  350. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  351. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  352. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  353. with open(pipeline_file) as f:
  354. data = json.load(f)
  355. op_info = data["op_info"]
  356. assert len(op_info) == 5
  357. for i in range(5):
  358. # Check for inline ops
  359. if op_info[i]["op_type"] == "RepeatOp":
  360. # Confirm these inline ops do not have metrics information
  361. assert op_info[i]["metrics"] is None
  362. else:
  363. assert "size" in op_info[i]["metrics"]["output_queue"]
  364. assert "length" in op_info[i]["metrics"]["output_queue"]
  365. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  366. self.confirm_cpuutil(cpu_util_file, 5)
  367. # Confirm dataset iterator file content
  368. self.confirm_dataset_iterator_file(dataset_iterator_file, 750)
  369. def test_profiling_seq_pipelines_epochctrl3(self, tmp_path):
  370. """
  371. Test with these 2 sequential pipelines:
  372. 1) Generator -> Batch -> EpochCtrl
  373. 2) Generator -> Batch
  374. Note: This is a simplification of the user scenario to use the same pipeline for training and then evaluation.
  375. """
  376. source = [(np.array([x]),) for x in range(64)]
  377. data1 = ds.GeneratorDataset(source, ["data"])
  378. data1 = data1.batch(32)
  379. # Test A - Call create_dict_iterator with num_epochs>1
  380. num_iter = 0
  381. # Note: If create_dict_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
  382. for _ in data1.create_dict_iterator(num_epochs=2):
  383. num_iter += 1
  384. assert num_iter == 2
  385. # Stop MindData Profiling and save output files to tmp_path
  386. self.md_profiler.stop()
  387. self.md_profiler.save(str(tmp_path))
  388. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  389. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  390. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  391. # Confirm pipeline file and CPU util file each have 3 ops
  392. self.confirm_ops_in_pipeline(pipeline_file, 3, ["GeneratorOp", "BatchOp", "EpochCtrlOp"])
  393. self.confirm_cpuutil(cpu_util_file, 3)
  394. # Test B - Call create_dict_iterator with num_epochs=1
  395. # Initialize and Start MindData profiling manager
  396. self.md_profiler.init()
  397. self.md_profiler.start()
  398. num_iter = 0
  399. # Note: If create_dict_iterator() is called with num_epochs=1,
  400. # then EpochCtrlOp should not be NOT added to the pipeline
  401. for _ in data1.create_dict_iterator(num_epochs=1):
  402. num_iter += 1
  403. assert num_iter == 2
  404. # Stop MindData Profiling and save output files to tmp_path
  405. self.md_profiler.stop()
  406. self.md_profiler.save(str(tmp_path))
  407. # Confirm pipeline file and CPU util file each have 2 ops
  408. self.confirm_ops_in_pipeline(pipeline_file, 2, ["GeneratorOp", "BatchOp"])
  409. self.confirm_cpuutil(cpu_util_file, 2)
  410. # Confirm dataset iterator file content
  411. self.confirm_dataset_iterator_file(dataset_iterator_file, 2)
  412. def test_profiling_seq_pipelines_epochctrl2(self, tmp_path):
  413. """
  414. Test with these 2 sequential pipelines:
  415. 1) Generator -> Batch
  416. 2) Generator -> Batch -> EpochCtrl
  417. """
  418. source = [(np.array([x]),) for x in range(64)]
  419. data2 = ds.GeneratorDataset(source, ["data"])
  420. data2 = data2.batch(16)
  421. # Test A - Call create_dict_iterator with num_epochs=1
  422. num_iter = 0
  423. # Note: If create_dict_iterator() is called with num_epochs=1, then EpochCtrlOp is NOT added to the pipeline
  424. for _ in data2.create_dict_iterator(num_epochs=1):
  425. num_iter += 1
  426. assert num_iter == 4
  427. # Stop MindData Profiling and save output files to tmp_path
  428. self.md_profiler.stop()
  429. self.md_profiler.save(str(tmp_path))
  430. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  431. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  432. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  433. # Confirm pipeline file and CPU util file each have 2 ops
  434. self.confirm_ops_in_pipeline(pipeline_file, 2, ["GeneratorOp", "BatchOp"])
  435. self.confirm_cpuutil(cpu_util_file, 2)
  436. # Test B - Call create_dict_iterator with num_epochs>1
  437. # Initialize and Start MindData profiling manager
  438. self.md_profiler.init()
  439. self.md_profiler.start()
  440. num_iter = 0
  441. # Note: If create_dict_iterator() is called with num_epochs>1,
  442. # then EpochCtrlOp should be added to the pipeline
  443. for _ in data2.create_dict_iterator(num_epochs=2):
  444. num_iter += 1
  445. assert num_iter == 4
  446. # Stop MindData Profiling and save output files to tmp_path
  447. self.md_profiler.stop()
  448. self.md_profiler.save(str(tmp_path))
  449. # Confirm pipeline file and CPU util file each have 3 ops
  450. self.confirm_ops_in_pipeline(pipeline_file, 3, ["GeneratorOp", "BatchOp", "EpochCtrlOp"])
  451. self.confirm_cpuutil(cpu_util_file, 3)
  452. # Confirm dataset iterator file content
  453. self.confirm_dataset_iterator_file(dataset_iterator_file, 4)
  454. def test_profiling_seq_pipelines_repeat(self, tmp_path):
  455. """
  456. Test with these 2 sequential pipelines:
  457. 1) Generator -> Batch
  458. 2) Generator -> Batch -> Repeat
  459. """
  460. source = [(np.array([x]),) for x in range(64)]
  461. data2 = ds.GeneratorDataset(source, ["data"])
  462. data2 = data2.batch(16)
  463. # Test A - Call create_dict_iterator with 2 ops in pipeline
  464. num_iter = 0
  465. for _ in data2.create_dict_iterator(num_epochs=1):
  466. num_iter += 1
  467. assert num_iter == 4
  468. # Stop MindData Profiling and save output files to tmp_path
  469. self.md_profiler.stop()
  470. self.md_profiler.save(str(tmp_path))
  471. pipeline_file = str(tmp_path) + "/pipeline_profiling_1.json"
  472. cpu_util_file = str(tmp_path) + "/minddata_cpu_utilization_1.json"
  473. dataset_iterator_file = str(tmp_path) + "/dataset_iterator_profiling_1.txt"
  474. # Confirm pipeline file and CPU util file each have 2 ops
  475. self.confirm_ops_in_pipeline(pipeline_file, 2, ["GeneratorOp", "BatchOp"])
  476. self.confirm_cpuutil(cpu_util_file, 2)
  477. # Test B - Add repeat op to pipeline. Call create_dict_iterator with 3 ops in pipeline
  478. # Initialize and Start MindData profiling manager
  479. self.md_profiler.init()
  480. self.md_profiler.start()
  481. data2 = data2.repeat(5)
  482. num_iter = 0
  483. for _ in data2.create_dict_iterator(num_epochs=1):
  484. num_iter += 1
  485. assert num_iter == 20
  486. # Stop MindData Profiling and save output files to tmp_path
  487. self.md_profiler.stop()
  488. self.md_profiler.save(str(tmp_path))
  489. # Confirm pipeline file and CPU util file each have 3 ops
  490. self.confirm_ops_in_pipeline(pipeline_file, 3, ["GeneratorOp", "BatchOp", "RepeatOp"])
  491. self.confirm_cpuutil(cpu_util_file, 3)
  492. # Confirm dataset iterator file content
  493. self.confirm_dataset_iterator_file(dataset_iterator_file, 20)