You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_profiling.py 24 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. # Copyright 2020-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Testing profiling support in DE
  17. """
  18. import json
  19. import os
  20. import numpy as np
  21. import pytest
  22. import mindspore.common.dtype as mstype
  23. import mindspore.dataset as ds
  24. import mindspore.dataset.transforms.c_transforms as C
  25. import mindspore.dataset.vision.c_transforms as vision
  26. import mindspore._c_dataengine as cde
  27. FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
  28. DATASET_ROOT = "../data/dataset/testTFTestAllTypes/"
  29. SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
  30. # add file name to rank id mapping to avoid file writing crash
  31. file_name_map_rank_id = {"test_profiling_simple_pipeline": "0",
  32. "test_profiling_complex_pipeline": "1",
  33. "test_profiling_inline_ops_pipeline1": "2",
  34. "test_profiling_inline_ops_pipeline2": "3",
  35. "test_profiling_sampling_interval": "4",
  36. "test_profiling_basic_pipeline": "5",
  37. "test_profiling_cifar10_pipeline": "6",
  38. "test_profiling_seq_pipelines_epochctrl3": "7",
  39. "test_profiling_seq_pipelines_epochctrl2": "8",
  40. "test_profiling_seq_pipelines_repeat": "9"}
  41. @pytest.mark.forked
  42. class TestMinddataProfilingManager:
  43. """
  44. Test MinddataProfilingManager
  45. """
  46. def setup_class(self):
  47. """
  48. Run once for the class
  49. """
  50. # Get instance pointer for MindData profiling manager
  51. self.md_profiler = cde.GlobalContext.profiling_manager()
  52. self._pipeline_file = "./pipeline_profiling"
  53. self._cpu_util_file = "./minddata_cpu_utilization"
  54. self._dataset_iterator_file = "./dataset_iterator_profiling"
  55. def setup_method(self):
  56. """
  57. Run before each test function.
  58. """
  59. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  60. file_id = file_name_map_rank_id[file_name]
  61. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  62. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  63. dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
  64. # Confirm MindData Profiling files do not yet exist
  65. assert os.path.exists(pipeline_file) is False
  66. assert os.path.exists(cpu_util_file) is False
  67. assert os.path.exists(dataset_iterator_file) is False
  68. # Set the MindData Profiling related environment variables
  69. os.environ['RANK_ID'] = file_id
  70. os.environ['DEVICE_ID'] = file_id
  71. # Initialize MindData profiling manager
  72. self.md_profiler.init()
  73. # Start MindData Profiling
  74. self.md_profiler.start()
  75. def teardown_method(self):
  76. """
  77. Run after each test function.
  78. """
  79. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  80. file_id = file_name_map_rank_id[file_name]
  81. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  82. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  83. dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
  84. # Delete MindData profiling files generated from the test.
  85. os.remove(pipeline_file)
  86. os.remove(cpu_util_file)
  87. os.remove(dataset_iterator_file)
  88. # Disable MindData Profiling related environment variables
  89. del os.environ['RANK_ID']
  90. del os.environ['DEVICE_ID']
  91. def confirm_cpuutil(self, num_pipeline_ops, cpu_uti_file):
  92. """
  93. Confirm CPU utilization JSON file with <num_pipeline_ops> in the pipeline
  94. """
  95. with open(cpu_uti_file) as file1:
  96. data = json.load(file1)
  97. op_info = data["op_info"]
  98. assert len(op_info) == num_pipeline_ops
  99. def confirm_ops_in_pipeline(self, num_ops, op_list, pipeline_file):
  100. """
  101. Confirm pipeline JSON file with <num_ops> are in the pipeline and the given list of ops
  102. """
  103. with open(pipeline_file) as file1:
  104. data = json.load(file1)
  105. op_info = data["op_info"]
  106. # Confirm ops in pipeline file
  107. assert len(op_info) == num_ops
  108. for i in range(num_ops):
  109. assert op_info[i]["op_type"] in op_list
  110. def test_profiling_simple_pipeline(self):
  111. """
  112. Generator -> Shuffle -> Batch
  113. """
  114. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  115. file_id = file_name_map_rank_id[file_name]
  116. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  117. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  118. dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
  119. source = [(np.array([x]),) for x in range(1024)]
  120. data1 = ds.GeneratorDataset(source, ["data"])
  121. data1 = data1.shuffle(64)
  122. data1 = data1.batch(32)
  123. # try output shape type and dataset size and make sure no profiling file is generated
  124. assert data1.output_shapes() == [[32, 1]]
  125. assert [str(tp) for tp in data1.output_types()] == ["int64"]
  126. assert data1.get_dataset_size() == 32
  127. # Confirm profiling files do not (yet) exist
  128. assert os.path.exists(pipeline_file) is False
  129. assert os.path.exists(cpu_util_file) is False
  130. assert os.path.exists(dataset_iterator_file) is False
  131. for _ in data1:
  132. pass
  133. # Stop MindData Profiling and save output files to current working directory
  134. self.md_profiler.stop()
  135. self.md_profiler.save('./')
  136. # Confirm profiling files now exist
  137. assert os.path.exists(pipeline_file) is True
  138. assert os.path.exists(cpu_util_file) is True
  139. assert os.path.exists(dataset_iterator_file) is True
  140. def test_profiling_complex_pipeline(self):
  141. """
  142. Generator -> Map ->
  143. -> Zip
  144. TFReader -> Shuffle ->
  145. """
  146. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  147. file_id = file_name_map_rank_id[file_name]
  148. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  149. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  150. source = [(np.array([x]),) for x in range(1024)]
  151. data1 = ds.GeneratorDataset(source, ["gen"])
  152. data1 = data1.map(operations=[(lambda x: x + 1)], input_columns=["gen"])
  153. pattern = DATASET_ROOT + "/test.data"
  154. data2 = ds.TFRecordDataset(pattern, SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
  155. data2 = data2.shuffle(4)
  156. data3 = ds.zip((data1, data2))
  157. for _ in data3:
  158. pass
  159. # Stop MindData Profiling and save output files to current working directory
  160. self.md_profiler.stop()
  161. self.md_profiler.save('./')
  162. with open(pipeline_file) as f:
  163. data = json.load(f)
  164. op_info = data["op_info"]
  165. assert len(op_info) == 5
  166. for i in range(5):
  167. if op_info[i]["op_type"] != "ZipOp":
  168. assert "size" in op_info[i]["metrics"]["output_queue"]
  169. assert "length" in op_info[i]["metrics"]["output_queue"]
  170. else:
  171. # Note: Zip is an inline op and hence does not have metrics information
  172. assert op_info[i]["metrics"] is None
  173. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  174. self.confirm_cpuutil(5, cpu_util_file)
  175. def test_profiling_inline_ops_pipeline1(self):
  176. """
  177. Test pipeline with inline ops: Concat and EpochCtrl
  178. Generator ->
  179. Concat -> EpochCtrl
  180. Generator ->
  181. """
  182. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  183. file_id = file_name_map_rank_id[file_name]
  184. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  185. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  186. # In source1 dataset: Number of rows is 3; its values are 0, 1, 2
  187. def source1():
  188. for i in range(3):
  189. yield (np.array([i]),)
  190. # In source2 dataset: Number of rows is 7; its values are 3, 4, 5 ... 9
  191. def source2():
  192. for i in range(3, 10):
  193. yield (np.array([i]),)
  194. data1 = ds.GeneratorDataset(source1, ["col1"])
  195. data2 = ds.GeneratorDataset(source2, ["col1"])
  196. data3 = data1.concat(data2)
  197. num_iter = 0
  198. # Note: set num_epochs=2 in create_tuple_iterator(), so that EpochCtrl op is added to the pipeline
  199. # Here i refers to index, d refers to data element
  200. for i, d in enumerate(data3.create_tuple_iterator(num_epochs=2, output_numpy=True)):
  201. num_iter += 1
  202. t = d
  203. assert i == t[0][0]
  204. assert num_iter == 10
  205. # Stop MindData Profiling and save output files to current working directory
  206. self.md_profiler.stop()
  207. self.md_profiler.save('./')
  208. # Confirm pipeline is created with EpochCtrl op
  209. with open(pipeline_file) as f:
  210. data = json.load(f)
  211. op_info = data["op_info"]
  212. assert len(op_info) == 4
  213. for i in range(4):
  214. # Note: The following ops are inline ops: Concat, EpochCtrl
  215. if op_info[i]["op_type"] in ("ConcatOp", "EpochCtrlOp"):
  216. # Confirm these inline ops do not have metrics information
  217. assert op_info[i]["metrics"] is None
  218. else:
  219. assert "size" in op_info[i]["metrics"]["output_queue"]
  220. assert "length" in op_info[i]["metrics"]["output_queue"]
  221. # Confirm CPU util JSON file content, when 4 ops are in the pipeline JSON file
  222. self.confirm_cpuutil(4, cpu_util_file)
  223. def test_profiling_inline_ops_pipeline2(self):
  224. """
  225. Test pipeline with many inline ops
  226. Generator -> Rename -> Skip -> Repeat -> Take
  227. """
  228. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  229. file_id = file_name_map_rank_id[file_name]
  230. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  231. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  232. # In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9
  233. def source1():
  234. for i in range(10):
  235. yield (np.array([i]),)
  236. data1 = ds.GeneratorDataset(source1, ["col1"])
  237. data1 = data1.rename(input_columns=["col1"], output_columns=["newcol1"])
  238. data1 = data1.skip(2)
  239. data1 = data1.repeat(2)
  240. data1 = data1.take(12)
  241. for _ in data1:
  242. pass
  243. # Stop MindData Profiling and save output files to current working directory
  244. self.md_profiler.stop()
  245. self.md_profiler.save('./')
  246. with open(pipeline_file) as f:
  247. data = json.load(f)
  248. op_info = data["op_info"]
  249. assert len(op_info) == 5
  250. for i in range(5):
  251. # Check for these inline ops
  252. if op_info[i]["op_type"] in ("RenameOp", "RepeatOp", "SkipOp", "TakeOp"):
  253. # Confirm these inline ops do not have metrics information
  254. assert op_info[i]["metrics"] is None
  255. else:
  256. assert "size" in op_info[i]["metrics"]["output_queue"]
  257. assert "length" in op_info[i]["metrics"]["output_queue"]
  258. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  259. self.confirm_cpuutil(5, cpu_util_file)
  260. def test_profiling_sampling_interval(self):
  261. """
  262. Test non-default monitor sampling interval
  263. """
  264. interval_origin = ds.config.get_monitor_sampling_interval()
  265. ds.config.set_monitor_sampling_interval(30)
  266. interval = ds.config.get_monitor_sampling_interval()
  267. assert interval == 30
  268. source = [(np.array([x]),) for x in range(1024)]
  269. data1 = ds.GeneratorDataset(source, ["data"])
  270. data1 = data1.shuffle(64)
  271. data1 = data1.batch(32)
  272. for _ in data1:
  273. pass
  274. ds.config.set_monitor_sampling_interval(interval_origin)
  275. # Stop MindData Profiling and save output files to current working directory
  276. self.md_profiler.stop()
  277. self.md_profiler.save('./')
  278. def test_profiling_basic_pipeline(self):
  279. """
  280. Test with this basic pipeline
  281. Generator -> Map -> Batch -> Repeat -> EpochCtrl
  282. """
  283. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  284. file_id = file_name_map_rank_id[file_name]
  285. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  286. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  287. def source1():
  288. for i in range(8000):
  289. yield (np.array([i]),)
  290. # Create this basic and common pipeline
  291. # Leaf/Source-Op -> Map -> Batch -> Repeat
  292. data1 = ds.GeneratorDataset(source1, ["col1"])
  293. type_cast_op = C.TypeCast(mstype.int32)
  294. data1 = data1.map(operations=type_cast_op, input_columns="col1")
  295. data1 = data1.batch(16)
  296. data1 = data1.repeat(2)
  297. num_iter = 0
  298. # Note: If create_dict_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
  299. for _ in data1.create_dict_iterator(num_epochs=2):
  300. num_iter += 1
  301. assert num_iter == 1000
  302. # Stop MindData Profiling and save output files to current working directory
  303. self.md_profiler.stop()
  304. self.md_profiler.save('./')
  305. with open(pipeline_file) as f:
  306. data = json.load(f)
  307. op_info = data["op_info"]
  308. assert len(op_info) == 5
  309. for i in range(5):
  310. # Check for inline ops
  311. if op_info[i]["op_type"] in ("EpochCtrlOp", "RepeatOp"):
  312. # Confirm these inline ops do not have metrics information
  313. assert op_info[i]["metrics"] is None
  314. else:
  315. assert "size" in op_info[i]["metrics"]["output_queue"]
  316. assert "length" in op_info[i]["metrics"]["output_queue"]
  317. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  318. self.confirm_cpuutil(5, cpu_util_file)
  319. def test_profiling_cifar10_pipeline(self):
  320. """
  321. Test with this common pipeline with Cifar10
  322. Cifar10 -> Map -> Map -> Batch -> Repeat
  323. """
  324. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  325. file_id = file_name_map_rank_id[file_name]
  326. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  327. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  328. # Create this common pipeline
  329. # Cifar10 -> Map -> Map -> Batch -> Repeat
  330. DATA_DIR_10 = "../data/dataset/testCifar10Data"
  331. data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=8000)
  332. type_cast_op = C.TypeCast(mstype.int32)
  333. data1 = data1.map(operations=type_cast_op, input_columns="label")
  334. random_horizontal_op = vision.RandomHorizontalFlip()
  335. data1 = data1.map(operations=random_horizontal_op, input_columns="image")
  336. data1 = data1.batch(32)
  337. data1 = data1.repeat(3)
  338. num_iter = 0
  339. # Note: If create_dict_iterator() is called with num_epochs=1, then EpochCtrlOp is NOT added to the pipeline
  340. for _ in data1.create_dict_iterator(num_epochs=1):
  341. num_iter += 1
  342. assert num_iter == 750
  343. # Stop MindData Profiling and save output files to current working directory
  344. self.md_profiler.stop()
  345. self.md_profiler.save('./')
  346. with open(pipeline_file) as f:
  347. data = json.load(f)
  348. op_info = data["op_info"]
  349. assert len(op_info) == 5
  350. for i in range(5):
  351. # Check for inline ops
  352. if op_info[i]["op_type"] == "RepeatOp":
  353. # Confirm these inline ops do not have metrics information
  354. assert op_info[i]["metrics"] is None
  355. else:
  356. assert "size" in op_info[i]["metrics"]["output_queue"]
  357. assert "length" in op_info[i]["metrics"]["output_queue"]
  358. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  359. self.confirm_cpuutil(5, cpu_util_file)
  360. def test_profiling_seq_pipelines_epochctrl3(self):
  361. """
  362. Test with these 2 sequential pipelines:
  363. 1) Generator -> Batch -> EpochCtrl
  364. 2) Generator -> Batch
  365. Note: This is a simplification of the user scenario to use the same pipeline for training and then evaluation.
  366. """
  367. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  368. file_id = file_name_map_rank_id[file_name]
  369. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  370. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  371. source = [(np.array([x]),) for x in range(64)]
  372. data1 = ds.GeneratorDataset(source, ["data"])
  373. data1 = data1.batch(32)
  374. # Test A - Call create_dict_iterator with num_epochs>1
  375. num_iter = 0
  376. # Note: If create_dict_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
  377. for _ in data1.create_dict_iterator(num_epochs=2):
  378. num_iter += 1
  379. assert num_iter == 2
  380. # Stop MindData Profiling and save output files to current working directory
  381. self.md_profiler.stop()
  382. self.md_profiler.save('./')
  383. # Confirm pipeline file and CPU util file each have 3 ops
  384. self.confirm_ops_in_pipeline(3, ["GeneratorOp", "BatchOp", "EpochCtrlOp"], pipeline_file)
  385. self.confirm_cpuutil(3, cpu_util_file)
  386. # Test B - Call create_dict_iterator with num_epochs=1
  387. # Initialize and Start MindData profiling manager
  388. self.md_profiler.init()
  389. self.md_profiler.start()
  390. num_iter = 0
  391. # Note: If create_dict_iterator() is called with num_epochs=1,
  392. # then EpochCtrlOp should not be NOT added to the pipeline
  393. for _ in data1.create_dict_iterator(num_epochs=1):
  394. num_iter += 1
  395. assert num_iter == 2
  396. # Stop MindData Profiling and save output files to current working directory
  397. self.md_profiler.stop()
  398. self.md_profiler.save('./')
  399. # Confirm pipeline file and CPU util file each have 2 ops
  400. self.confirm_ops_in_pipeline(2, ["GeneratorOp", "BatchOp"], pipeline_file)
  401. self.confirm_cpuutil(2, cpu_util_file)
  402. def test_profiling_seq_pipelines_epochctrl2(self):
  403. """
  404. Test with these 2 sequential pipelines:
  405. 1) Generator -> Batch
  406. 2) Generator -> Batch -> EpochCtrl
  407. """
  408. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  409. file_id = file_name_map_rank_id[file_name]
  410. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  411. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  412. source = [(np.array([x]),) for x in range(64)]
  413. data2 = ds.GeneratorDataset(source, ["data"])
  414. data2 = data2.batch(16)
  415. # Test A - Call create_dict_iterator with num_epochs=1
  416. num_iter = 0
  417. # Note: If create_dict_iterator() is called with num_epochs=1, then EpochCtrlOp is NOT added to the pipeline
  418. for _ in data2.create_dict_iterator(num_epochs=1):
  419. num_iter += 1
  420. assert num_iter == 4
  421. # Stop MindData Profiling and save output files to current working directory
  422. self.md_profiler.stop()
  423. self.md_profiler.save('./')
  424. # Confirm pipeline file and CPU util file each have 2 ops
  425. self.confirm_ops_in_pipeline(2, ["GeneratorOp", "BatchOp"], pipeline_file)
  426. self.confirm_cpuutil(2, cpu_util_file)
  427. # Test B - Call create_dict_iterator with num_epochs>1
  428. # Initialize and Start MindData profiling manager
  429. self.md_profiler.init()
  430. self.md_profiler.start()
  431. num_iter = 0
  432. # Note: If create_dict_iterator() is called with num_epochs>1,
  433. # then EpochCtrlOp should be added to the pipeline
  434. for _ in data2.create_dict_iterator(num_epochs=2):
  435. num_iter += 1
  436. assert num_iter == 4
  437. # Stop MindData Profiling and save output files to current working directory
  438. self.md_profiler.stop()
  439. self.md_profiler.save('./')
  440. # Confirm pipeline file and CPU util file each have 3 ops
  441. self.confirm_ops_in_pipeline(3, ["GeneratorOp", "BatchOp", "EpochCtrlOp"], pipeline_file)
  442. self.confirm_cpuutil(3, cpu_util_file)
  443. def test_profiling_seq_pipelines_repeat(self):
  444. """
  445. Test with these 2 sequential pipelines:
  446. 1) Generator -> Batch
  447. 2) Generator -> Batch -> Repeat
  448. """
  449. file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
  450. file_id = file_name_map_rank_id[file_name]
  451. pipeline_file = self._pipeline_file + "_" + file_id + ".json"
  452. cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
  453. source = [(np.array([x]),) for x in range(64)]
  454. data2 = ds.GeneratorDataset(source, ["data"])
  455. data2 = data2.batch(16)
  456. # Test A - Call create_dict_iterator with 2 ops in pipeline
  457. num_iter = 0
  458. for _ in data2.create_dict_iterator(num_epochs=1):
  459. num_iter += 1
  460. assert num_iter == 4
  461. # Stop MindData Profiling and save output files to current working directory
  462. self.md_profiler.stop()
  463. self.md_profiler.save('./')
  464. # Confirm pipeline file and CPU util file each have 2 ops
  465. self.confirm_ops_in_pipeline(2, ["GeneratorOp", "BatchOp"], pipeline_file)
  466. self.confirm_cpuutil(2, cpu_util_file)
  467. # Test B - Add repeat op to pipeline. Call create_dict_iterator with 3 ops in pipeline
  468. # Initialize and Start MindData profiling manager
  469. self.md_profiler.init()
  470. self.md_profiler.start()
  471. data2 = data2.repeat(5)
  472. num_iter = 0
  473. for _ in data2.create_dict_iterator(num_epochs=1):
  474. num_iter += 1
  475. assert num_iter == 20
  476. # Stop MindData Profiling and save output files to current working directory
  477. self.md_profiler.stop()
  478. self.md_profiler.save('./')
  479. # Confirm pipeline file and CPU util file each have 3 ops
  480. self.confirm_ops_in_pipeline(3, ["GeneratorOp", "BatchOp", "RepeatOp"], pipeline_file)
  481. self.confirm_cpuutil(3, cpu_util_file)