You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_profiling.py 18 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Testing profiling support in DE
  17. """
  18. import json
  19. import os
  20. import numpy as np
  21. import mindspore.common.dtype as mstype
  22. import mindspore.dataset as ds
  23. import mindspore.dataset.transforms.c_transforms as C
  24. import mindspore.dataset.vision.c_transforms as vision
  25. FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
  26. DATASET_ROOT = "../data/dataset/testTFTestAllTypes/"
  27. SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
  28. PIPELINE_FILE = "./pipeline_profiling_1.json"
  29. CPU_UTIL_FILE = "./minddata_cpu_utilization_1.json"
  30. DATASET_ITERATOR_FILE = "./dataset_iterator_profiling_1.txt"
  31. def set_profiling_env_var():
  32. """
  33. Set the MindData Profiling environment variables
  34. """
  35. os.environ['PROFILING_MODE'] = 'true'
  36. os.environ['MINDDATA_PROFILING_DIR'] = '.'
  37. os.environ['DEVICE_ID'] = '1'
  38. os.environ['RANK_ID'] = '1'
  39. def delete_profiling_files():
  40. """
  41. Delete the MindData profiling files generated from the test.
  42. Also disable the MindData Profiling environment variables.
  43. """
  44. # Delete MindData profiling files
  45. os.remove(PIPELINE_FILE)
  46. os.remove(CPU_UTIL_FILE)
  47. os.remove(DATASET_ITERATOR_FILE)
  48. # Disable MindData Profiling environment variables
  49. del os.environ['PROFILING_MODE']
  50. del os.environ['MINDDATA_PROFILING_DIR']
  51. del os.environ['DEVICE_ID']
  52. del os.environ['RANK_ID']
  53. def confirm_cpuutil(num_pipeline_ops):
  54. """
  55. Confirm CPU utilization JSON file when <num_pipeline_ops> are in the pipeline
  56. """
  57. with open(CPU_UTIL_FILE) as file1:
  58. data = json.load(file1)
  59. op_info = data["op_info"]
  60. # Confirm <num_pipeline_ops>+1 ops in CPU util file (including op_id=-1 for monitor thread)
  61. assert len(op_info) == num_pipeline_ops + 1
  62. def test_profiling_simple_pipeline():
  63. """
  64. Generator -> Shuffle -> Batch
  65. """
  66. set_profiling_env_var()
  67. source = [(np.array([x]),) for x in range(1024)]
  68. data1 = ds.GeneratorDataset(source, ["data"])
  69. data1 = data1.shuffle(64)
  70. data1 = data1.batch(32)
  71. # try output shape type and dataset size and make sure no profiling file is generated
  72. assert data1.output_shapes() == [[32, 1]]
  73. assert [str(tp) for tp in data1.output_types()] == ["int64"]
  74. assert data1.get_dataset_size() == 32
  75. # Confirm profiling files do not (yet) exist
  76. assert os.path.exists(PIPELINE_FILE) is False
  77. assert os.path.exists(CPU_UTIL_FILE) is False
  78. assert os.path.exists(DATASET_ITERATOR_FILE) is False
  79. try:
  80. for _ in data1:
  81. pass
  82. # Confirm profiling files now exist
  83. assert os.path.exists(PIPELINE_FILE) is True
  84. assert os.path.exists(CPU_UTIL_FILE) is True
  85. assert os.path.exists(DATASET_ITERATOR_FILE) is True
  86. except Exception as error:
  87. delete_profiling_files()
  88. raise error
  89. else:
  90. delete_profiling_files()
  91. def test_profiling_complex_pipeline():
  92. """
  93. Generator -> Map ->
  94. -> Zip
  95. TFReader -> Shuffle ->
  96. """
  97. set_profiling_env_var()
  98. source = [(np.array([x]),) for x in range(1024)]
  99. data1 = ds.GeneratorDataset(source, ["gen"])
  100. data1 = data1.map(operations=[(lambda x: x + 1)], input_columns=["gen"])
  101. pattern = DATASET_ROOT + "/test.data"
  102. data2 = ds.TFRecordDataset(pattern, SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
  103. data2 = data2.shuffle(4)
  104. data3 = ds.zip((data1, data2))
  105. try:
  106. for _ in data3:
  107. pass
  108. with open(PIPELINE_FILE) as f:
  109. data = json.load(f)
  110. op_info = data["op_info"]
  111. assert len(op_info) == 5
  112. for i in range(5):
  113. if op_info[i]["op_type"] != "ZipOp":
  114. assert "size" in op_info[i]["metrics"]["output_queue"]
  115. assert "length" in op_info[i]["metrics"]["output_queue"]
  116. assert "throughput" in op_info[i]["metrics"]["output_queue"]
  117. else:
  118. # Note: Zip is an inline op and hence does not have metrics information
  119. assert op_info[i]["metrics"] is None
  120. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  121. confirm_cpuutil(5)
  122. except Exception as error:
  123. delete_profiling_files()
  124. raise error
  125. else:
  126. delete_profiling_files()
  127. def test_profiling_inline_ops_pipeline1():
  128. """
  129. Test pipeline with inline ops: Concat and EpochCtrl
  130. Generator ->
  131. Concat -> EpochCtrl
  132. Generator ->
  133. """
  134. set_profiling_env_var()
  135. # In source1 dataset: Number of rows is 3; its values are 0, 1, 2
  136. def source1():
  137. for i in range(3):
  138. yield (np.array([i]),)
  139. # In source2 dataset: Number of rows is 7; its values are 3, 4, 5 ... 9
  140. def source2():
  141. for i in range(3, 10):
  142. yield (np.array([i]),)
  143. data1 = ds.GeneratorDataset(source1, ["col1"])
  144. data2 = ds.GeneratorDataset(source2, ["col1"])
  145. data3 = data1.concat(data2)
  146. try:
  147. num_iter = 0
  148. # Note: Do not explicitly set num_epochs argument in create_tuple_iterator() call
  149. # Here i refers to index, d refers to data element
  150. for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
  151. num_iter += 1
  152. t = d
  153. assert i == t[0][0]
  154. assert num_iter == 10
  155. # Confirm pipeline is created with EpochCtrl op
  156. with open(PIPELINE_FILE) as f:
  157. data = json.load(f)
  158. op_info = data["op_info"]
  159. assert len(op_info) == 4
  160. for i in range(4):
  161. # Note: The following ops are inline ops: Concat, EpochCtrl
  162. if op_info[i]["op_type"] in ("ConcatOp", "EpochCtrlOp"):
  163. # Confirm these inline ops do not have metrics information
  164. assert op_info[i]["metrics"] is None
  165. else:
  166. assert "size" in op_info[i]["metrics"]["output_queue"]
  167. assert "length" in op_info[i]["metrics"]["output_queue"]
  168. assert "throughput" in op_info[i]["metrics"]["output_queue"]
  169. # Confirm CPU util JSON file content, when 4 ops are in the pipeline JSON file
  170. confirm_cpuutil(4)
  171. except Exception as error:
  172. delete_profiling_files()
  173. raise error
  174. else:
  175. delete_profiling_files()
  176. def test_profiling_inline_ops_pipeline2():
  177. """
  178. Test pipeline with many inline ops
  179. Generator -> Rename -> Skip -> Repeat -> Take
  180. """
  181. set_profiling_env_var()
  182. # In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9
  183. def source1():
  184. for i in range(10):
  185. yield (np.array([i]),)
  186. data1 = ds.GeneratorDataset(source1, ["col1"])
  187. data1 = data1.rename(input_columns=["col1"], output_columns=["newcol1"])
  188. data1 = data1.skip(2)
  189. data1 = data1.repeat(2)
  190. data1 = data1.take(12)
  191. try:
  192. for _ in data1:
  193. pass
  194. with open(PIPELINE_FILE) as f:
  195. data = json.load(f)
  196. op_info = data["op_info"]
  197. assert len(op_info) == 5
  198. for i in range(5):
  199. # Check for these inline ops
  200. if op_info[i]["op_type"] in ("RenameOp", "RepeatOp", "SkipOp", "TakeOp"):
  201. # Confirm these inline ops do not have metrics information
  202. assert op_info[i]["metrics"] is None
  203. else:
  204. assert "size" in op_info[i]["metrics"]["output_queue"]
  205. assert "length" in op_info[i]["metrics"]["output_queue"]
  206. assert "throughput" in op_info[i]["metrics"]["output_queue"]
  207. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  208. confirm_cpuutil(5)
  209. except Exception as error:
  210. delete_profiling_files()
  211. raise error
  212. else:
  213. delete_profiling_files()
  214. def test_profiling_sampling_interval():
  215. """
  216. Test non-default monitor sampling interval
  217. """
  218. set_profiling_env_var()
  219. interval_origin = ds.config.get_monitor_sampling_interval()
  220. ds.config.set_monitor_sampling_interval(30)
  221. interval = ds.config.get_monitor_sampling_interval()
  222. assert interval == 30
  223. source = [(np.array([x]),) for x in range(1024)]
  224. data1 = ds.GeneratorDataset(source, ["data"])
  225. data1 = data1.shuffle(64)
  226. data1 = data1.batch(32)
  227. try:
  228. for _ in data1:
  229. pass
  230. except Exception as error:
  231. ds.config.set_monitor_sampling_interval(interval_origin)
  232. delete_profiling_files()
  233. raise error
  234. else:
  235. ds.config.set_monitor_sampling_interval(interval_origin)
  236. delete_profiling_files()
  237. def test_profiling_basic_pipeline():
  238. """
  239. Test with this basic pipeline
  240. Generator -> Map -> Batch -> Repeat -> EpochCtrl
  241. """
  242. set_profiling_env_var()
  243. def source1():
  244. for i in range(8000):
  245. yield (np.array([i]),)
  246. # Create this basic and common pipeline
  247. # Leaf/Source-Op -> Map -> Batch -> Repeat
  248. data1 = ds.GeneratorDataset(source1, ["col1"])
  249. type_cast_op = C.TypeCast(mstype.int32)
  250. data1 = data1.map(operations=type_cast_op, input_columns="col1")
  251. data1 = data1.batch(16)
  252. data1 = data1.repeat(2)
  253. try:
  254. num_iter = 0
  255. # Note: If create_tuple_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
  256. for _ in data1.create_dict_iterator(num_epochs=2):
  257. num_iter += 1
  258. assert num_iter == 1000
  259. with open(PIPELINE_FILE) as f:
  260. data = json.load(f)
  261. op_info = data["op_info"]
  262. assert len(op_info) == 5
  263. for i in range(5):
  264. # Check for inline ops
  265. if op_info[i]["op_type"] in ("EpochCtrlOp", "RepeatOp"):
  266. # Confirm these inline ops do not have metrics information
  267. assert op_info[i]["metrics"] is None
  268. else:
  269. assert "size" in op_info[i]["metrics"]["output_queue"]
  270. assert "length" in op_info[i]["metrics"]["output_queue"]
  271. assert "throughput" in op_info[i]["metrics"]["output_queue"]
  272. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  273. confirm_cpuutil(5)
  274. except Exception as error:
  275. delete_profiling_files()
  276. raise error
  277. else:
  278. delete_profiling_files()
  279. def test_profiling_cifar10_pipeline():
  280. """
  281. Test with this common pipeline with Cifar10
  282. Cifar10 -> Map -> Map -> Batch -> Repeat
  283. """
  284. set_profiling_env_var()
  285. # Create this common pipeline
  286. # Cifar10 -> Map -> Map -> Batch -> Repeat
  287. DATA_DIR_10 = "../data/dataset/testCifar10Data"
  288. data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=8000)
  289. type_cast_op = C.TypeCast(mstype.int32)
  290. data1 = data1.map(operations=type_cast_op, input_columns="label")
  291. random_horizontal_op = vision.RandomHorizontalFlip()
  292. data1 = data1.map(operations=random_horizontal_op, input_columns="image")
  293. data1 = data1.batch(32)
  294. data1 = data1.repeat(3)
  295. try:
  296. num_iter = 0
  297. # Note: If create_tuple_iterator() is called with num_epochs=1, then EpochCtrlOp is NOT added to the pipeline
  298. for _ in data1.create_dict_iterator(num_epochs=1):
  299. num_iter += 1
  300. assert num_iter == 750
  301. with open(PIPELINE_FILE) as f:
  302. data = json.load(f)
  303. op_info = data["op_info"]
  304. assert len(op_info) == 5
  305. for i in range(5):
  306. # Check for inline ops
  307. if op_info[i]["op_type"] == "RepeatOp":
  308. # Confirm these inline ops do not have metrics information
  309. assert op_info[i]["metrics"] is None
  310. else:
  311. assert "size" in op_info[i]["metrics"]["output_queue"]
  312. assert "length" in op_info[i]["metrics"]["output_queue"]
  313. assert "throughput" in op_info[i]["metrics"]["output_queue"]
  314. # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file
  315. confirm_cpuutil(5)
  316. except Exception as error:
  317. delete_profiling_files()
  318. raise error
  319. else:
  320. delete_profiling_files()
  321. def confirm_3ops_in_pipeline():
  322. with open(PIPELINE_FILE) as file1:
  323. data = json.load(file1)
  324. op_info = data["op_info"]
  325. # Confirm 3 ops in pipeline file
  326. assert len(op_info) == 3
  327. for i in range(3):
  328. assert op_info[i]["op_type"] in ("GeneratorOp", "BatchOp", "EpochCtrlOp")
  329. def confirm_2ops_in_pipeline():
  330. with open(PIPELINE_FILE) as file1:
  331. data = json.load(file1)
  332. op_info = data["op_info"]
  333. # Confirm 2 ops in pipeline file
  334. assert len(op_info) == 2
  335. for i in range(2):
  336. assert op_info[i]["op_type"] in ("GeneratorOp", "BatchOp")
  337. def test_profiling_seq_pipelines_epochctrl3():
  338. """
  339. Test with these 2 sequential pipelines:
  340. 1) Generator -> Batch -> EpochCtrl
  341. 2) Generator -> Batch
  342. Note: This is a simplification of the user scenario to use the same pipeline for training and then evaluation.
  343. """
  344. set_profiling_env_var()
  345. source = [(np.array([x]),) for x in range(64)]
  346. data1 = ds.GeneratorDataset(source, ["data"])
  347. data1 = data1.batch(32)
  348. try:
  349. # Test A - Call create_dict_iterator with num_epochs>1
  350. num_iter = 0
  351. # Note: If create_tuple_iterator() is called with num_epochs>1, then EpochCtrlOp is added to the pipeline
  352. for _ in data1.create_dict_iterator(num_epochs=2):
  353. num_iter += 1
  354. assert num_iter == 2
  355. confirm_3ops_in_pipeline()
  356. confirm_cpuutil(3)
  357. # Test B - Call create_dict_iterator with num_epochs=1
  358. num_iter = 0
  359. # Note: If create_tuple_iterator() is called with num_epochs=1,
  360. # then EpochCtrlOp should not be NOT added to the pipeline
  361. for _ in data1.create_dict_iterator(num_epochs=1):
  362. num_iter += 1
  363. assert num_iter == 2
  364. # confirm_2ops_in_pipeline()
  365. # MD BUG: Confirm pipeline file is not changed and wrongly still has 3 ops
  366. confirm_3ops_in_pipeline()
  367. # Confirm CPU util file has correct number of ops
  368. confirm_cpuutil(2)
  369. except Exception as error:
  370. delete_profiling_files()
  371. raise error
  372. else:
  373. delete_profiling_files()
  374. def test_profiling_seq_pipelines_epochctrl2():
  375. """
  376. Test with these 2 sequential pipelines:
  377. 1) Generator -> Batch
  378. 2) Generator -> Batch -> EpochCtrl
  379. """
  380. set_profiling_env_var()
  381. source = [(np.array([x]),) for x in range(64)]
  382. data2 = ds.GeneratorDataset(source, ["data"])
  383. data2 = data2.batch(16)
  384. try:
  385. # Test A - Call create_dict_iterator with num_epochs=1
  386. num_iter = 0
  387. # Note: If create_tuple_iterator() is called with num_epochs=1, then EpochCtrlOp is NOT added to the pipeline
  388. for _ in data2.create_dict_iterator(num_epochs=1):
  389. num_iter += 1
  390. assert num_iter == 4
  391. confirm_2ops_in_pipeline()
  392. confirm_cpuutil(2)
  393. # Test B - Call create_dict_iterator with num_epochs>1
  394. num_iter = 0
  395. # Note: If create_tuple_iterator() is called with num_epochs>1,
  396. # then EpochCtrlOp should be added to the pipeline
  397. for _ in data2.create_dict_iterator(num_epochs=2):
  398. num_iter += 1
  399. assert num_iter == 4
  400. # confirm_3ops_in_pipeline()
  401. # MD BUG: Confirm pipeline file is not changed and wrongly still has 2 ops
  402. confirm_2ops_in_pipeline()
  403. # Confirm CPU util file has correct number of ops
  404. confirm_cpuutil(3)
  405. except Exception as error:
  406. delete_profiling_files()
  407. raise error
  408. else:
  409. delete_profiling_files()
  410. def test_profiling_seq_pipelines_repeat():
  411. """
  412. Test with these 2 sequential pipelines:
  413. 1) Generator -> Batch
  414. 2) Generator -> Batch -> Repeat
  415. """
  416. set_profiling_env_var()
  417. source = [(np.array([x]),) for x in range(64)]
  418. data2 = ds.GeneratorDataset(source, ["data"])
  419. data2 = data2.batch(16)
  420. try:
  421. # Test A - Call create_dict_iterator with 2 ops in pipeline
  422. num_iter = 0
  423. for _ in data2.create_dict_iterator(num_epochs=1):
  424. num_iter += 1
  425. assert num_iter == 4
  426. confirm_2ops_in_pipeline()
  427. confirm_cpuutil(2)
  428. # Test B - Add repeat op to pipeline. Call create_dict_iterator with 3 ops in pipeline
  429. data2 = data2.repeat(5)
  430. num_iter = 0
  431. for _ in data2.create_dict_iterator(num_epochs=1):
  432. num_iter += 1
  433. assert num_iter == 20
  434. # confirm_3ops_in_pipeline()
  435. # MD BUG: Confirm pipeline file is not changed and wrongly still has 2 ops
  436. confirm_2ops_in_pipeline()
  437. # Confirm CPU util file has correct number of ops
  438. confirm_cpuutil(3)
  439. except Exception as error:
  440. delete_profiling_files()
  441. raise error
  442. else:
  443. delete_profiling_files()
  444. if __name__ == "__main__":
  445. test_profiling_simple_pipeline()
  446. test_profiling_complex_pipeline()
  447. test_profiling_inline_ops_pipeline1()
  448. test_profiling_inline_ops_pipeline2()
  449. test_profiling_sampling_interval()
  450. test_profiling_basic_pipeline()
  451. test_profiling_cifar10_pipeline()
  452. test_profiling_seq_pipelines_epochctrl3()
  453. test_profiling_seq_pipelines_epochctrl2()
  454. test_profiling_seq_pipelines_repeat()