You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiler_test.cc 18 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /**
  2. * Copyright 2021-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <chrono>
  17. #include <thread>
  18. #include "common/common.h"
  19. #include "minddata/dataset/engine/perf/profiling.h"
  20. #include "minddata/dataset/include/dataset/datasets.h"
  21. using namespace mindspore::dataset;
  22. using mindspore::LogStream;
  23. using mindspore::MsLogLevel::INFO;
  24. namespace mindspore {
  25. namespace dataset {
  26. namespace test {
  27. class MindDataTestProfiler : public UT::DatasetOpTesting {
  28. protected:
  29. MindDataTestProfiler() {}
  30. Status DeleteFiles(int file_id = 0) {
  31. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  32. std::string pipeline_file = "./pipeline_profiling_" + std::to_string(file_id) + ".json";
  33. std::string cpu_util_file = "./minddata_cpu_utilization_" + std::to_string(file_id) + ".json";
  34. std::string dataset_iterator_file = "./dataset_iterator_profiling_" + std::to_string(file_id) + ".txt";
  35. if (remove(pipeline_file.c_str()) == 0 && remove(cpu_util_file.c_str()) == 0 &&
  36. remove(dataset_iterator_file.c_str()) == 0) {
  37. return Status::OK();
  38. } else {
  39. RETURN_STATUS_UNEXPECTED("Error deleting profiler files");
  40. }
  41. }
  42. std::shared_ptr<Dataset> set_dataset(int32_t op_input) {
  43. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  44. int64_t num_samples = 20;
  45. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(0, num_samples));
  46. EXPECT_NE(ds, nullptr);
  47. ds = ds->Shuffle(op_input);
  48. EXPECT_NE(ds, nullptr);
  49. // Create objects for the tensor ops
  50. std::shared_ptr<TensorTransform> one_hot = std::make_shared<transforms::OneHot>(op_input);
  51. EXPECT_NE(one_hot, nullptr);
  52. // Create a Map operation, this will automatically add a project after map
  53. ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
  54. EXPECT_NE(ds, nullptr);
  55. ds = ds->Take(op_input);
  56. EXPECT_NE(ds, nullptr);
  57. ds = ds->Batch(op_input, true);
  58. EXPECT_NE(ds, nullptr);
  59. int repeat_num = 10;
  60. ds = ds->Repeat(repeat_num);
  61. EXPECT_NE(ds, nullptr);
  62. return ds;
  63. }
  64. };
  65. /// Feature: MindData Profiling Support
  66. /// Description: Test MindData Profiling with profiling enabled for pipeline with ImageFolder
  67. /// Expectation: Profiling files are created.
  68. TEST_F(MindDataTestProfiler, TestProfilerManager1) {
  69. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManager1.";
  70. // Enable profiler and check
  71. common::SetEnv("RANK_ID", "1");
  72. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  73. EXPECT_OK(profiler_manager->Init());
  74. EXPECT_OK(profiler_manager->Start());
  75. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  76. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  77. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(0, 2));
  78. EXPECT_NE(ds, nullptr);
  79. ds = ds->Repeat(2);
  80. EXPECT_NE(ds, nullptr);
  81. ds = ds->Shuffle(4);
  82. EXPECT_NE(ds, nullptr);
  83. // Create objects for the tensor ops
  84. std::shared_ptr<TensorTransform> one_hot = std::make_shared<transforms::OneHot>(10);
  85. EXPECT_NE(one_hot, nullptr);
  86. // Create a Map operation, this will automatically add a project after map
  87. ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
  88. EXPECT_NE(ds, nullptr);
  89. ds = ds->Take(4);
  90. EXPECT_NE(ds, nullptr);
  91. ds = ds->Batch(2, true);
  92. EXPECT_NE(ds, nullptr);
  93. // No columns are specified, use all columns
  94. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  95. EXPECT_NE(iter, nullptr);
  96. // Iterate the dataset and get each row
  97. std::vector<mindspore::MSTensor> row;
  98. ASSERT_OK(iter->GetNextRow(&row));
  99. uint64_t i = 0;
  100. while (row.size() != 0) {
  101. ASSERT_OK(iter->GetNextRow(&row));
  102. i++;
  103. }
  104. EXPECT_EQ(i, 2);
  105. // Manually terminate the pipeline
  106. iter->Stop();
  107. // Stop MindData Profiling and save output files to current working directory
  108. EXPECT_OK(profiler_manager->Stop());
  109. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  110. EXPECT_OK(profiler_manager->Save("."));
  111. // File_id is expected to equal RANK_ID
  112. EXPECT_OK(DeleteFiles(1));
  113. }
  114. /// Feature: MindData Profiling Support
  115. /// Description: Test MindData Profiling with profiling enabled for pipeline with Mnist
  116. /// Expectation: Profiling files are created.
  117. TEST_F(MindDataTestProfiler, TestProfilerManager2) {
  118. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManager2.";
  119. // Enable profiler and check
  120. common::SetEnv("RANK_ID", "2");
  121. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  122. EXPECT_OK(profiler_manager->Init());
  123. EXPECT_OK(profiler_manager->Start());
  124. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  125. // Create a Mnist Dataset
  126. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  127. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<SequentialSampler>(0, 3));
  128. EXPECT_NE(ds, nullptr);
  129. ds = ds->Skip(1);
  130. EXPECT_NE(ds, nullptr);
  131. ds = ds->Repeat(2);
  132. EXPECT_NE(ds, nullptr);
  133. ds = ds->Batch(2, false);
  134. EXPECT_NE(ds, nullptr);
  135. // No columns are specified, use all columns
  136. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  137. EXPECT_NE(iter, nullptr);
  138. // Iterate the dataset and get each row
  139. std::vector<mindspore::MSTensor> row;
  140. ASSERT_OK(iter->GetNextRow(&row));
  141. uint64_t i = 0;
  142. while (row.size() != 0) {
  143. ASSERT_OK(iter->GetNextRow(&row));
  144. i++;
  145. }
  146. EXPECT_EQ(i, 2);
  147. // Manually terminate the pipeline
  148. iter->Stop();
  149. // Stop MindData Profiling and save output files to current working directory
  150. EXPECT_OK(profiler_manager->Stop());
  151. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  152. EXPECT_OK(profiler_manager->Save("."));
  153. // File_id is expected to equal RANK_ID
  154. EXPECT_OK(DeleteFiles(2));
  155. }
  156. /// Feature: MindData Profiling Support
  157. /// Description: Test MindData Profiling GetByEpoch Methods
  158. /// Expectation: Results are successfully outputted.
  159. TEST_F(MindDataTestProfiler, TestProfilerManagerByEpoch) {
  160. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManagerByEpoch.";
  161. // Enable profiler and check
  162. common::SetEnv("RANK_ID", "2");
  163. GlobalContext::config_manager()->set_monitor_sampling_interval(10);
  164. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  165. EXPECT_OK(profiler_manager->Init());
  166. EXPECT_OK(profiler_manager->Start());
  167. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  168. std::shared_ptr<Dataset> ds = set_dataset(20);
  169. // No columns are specified, use all columns
  170. std::vector<std::string> columns = {};
  171. std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 3);
  172. EXPECT_NE(iter, nullptr);
  173. std::vector<uint8_t> cpu_result;
  174. std::vector<uint16_t> op_result;
  175. std::vector<int32_t> connector_result;
  176. std::vector<int32_t> time_result;
  177. float_t queue_result;
  178. // Note: These Get* calls fail since epoch number cannot be 0.
  179. EXPECT_ERROR(profiler_manager->GetUserCpuUtilByEpoch(0, &cpu_result));
  180. EXPECT_ERROR(profiler_manager->GetBatchTimeByEpoch(0, &time_result));
  181. std::vector<mindspore::MSTensor> row;
  182. for (int i = 0; i < 3; i++) {
  183. // Iterate the dataset and get each row
  184. ASSERT_OK(iter->GetNextRow(&row));
  185. while (row.size() != 0) {
  186. ASSERT_OK(iter->GetNextRow(&row));
  187. }
  188. }
  189. // Check iteration failure after finishing the num_epochs
  190. EXPECT_ERROR(iter->GetNextRow(&row));
  191. // Manually terminate the pipeline
  192. iter->Stop();
  193. for (int i = 1; i < 4; i++) {
  194. ASSERT_OK(profiler_manager->GetUserCpuUtilByEpoch(i, &cpu_result));
  195. ASSERT_OK(profiler_manager->GetUserCpuUtilByEpoch(i - 1, i, &op_result));
  196. ASSERT_OK(profiler_manager->GetSysCpuUtilByEpoch(i, &cpu_result));
  197. ASSERT_OK(profiler_manager->GetSysCpuUtilByEpoch(i - 1, i, &op_result));
  198. // Epoch is 1 for each iteration and 20 steps for each epoch, so the output size are expected to be 20
  199. ASSERT_OK(profiler_manager->GetBatchTimeByEpoch(i, &time_result));
  200. EXPECT_EQ(time_result.size(), 10);
  201. time_result.clear();
  202. ASSERT_OK(profiler_manager->GetPipelineTimeByEpoch(i, &time_result));
  203. EXPECT_EQ(time_result.size(), 10);
  204. time_result.clear();
  205. ASSERT_OK(profiler_manager->GetPushTimeByEpoch(i, &time_result));
  206. EXPECT_EQ(time_result.size(), 10);
  207. time_result.clear();
  208. ASSERT_OK(profiler_manager->GetConnectorSizeByEpoch(i, &connector_result));
  209. EXPECT_EQ(connector_result.size(), 10);
  210. connector_result.clear();
  211. ASSERT_OK(profiler_manager->GetConnectorCapacityByEpoch(i, &connector_result));
  212. EXPECT_EQ(connector_result.size(), 10);
  213. connector_result.clear();
  214. ASSERT_OK(profiler_manager->GetConnectorSizeByEpoch(i - 1, i, &connector_result));
  215. EXPECT_GT(connector_result.size(), 0); // Connector size is expected to be greater than 0
  216. connector_result.clear();
  217. ASSERT_OK(profiler_manager->GetEmptyQueueFrequencyByEpoch(i, &queue_result));
  218. EXPECT_GE(queue_result, 0);
  219. EXPECT_LE(queue_result, 1);
  220. }
  221. ASSERT_ERROR(profiler_manager->GetUserCpuUtilByEpoch(4, &cpu_result)); // Check there is no epoch 4
  222. int num = profiler_manager->GetNumOfProfiledEpochs();
  223. EXPECT_EQ(num, 3);
  224. // Stop MindData Profiling and save output files to current working directory
  225. EXPECT_OK(profiler_manager->Stop());
  226. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  227. EXPECT_OK(profiler_manager->Save("."));
  228. // File_id is expected to equal RANK_ID
  229. EXPECT_OK(DeleteFiles(2));
  230. }
  231. /// Feature: MindData Profiling Support
  232. /// Description: Test MindData Profiling GetByStep Methods
  233. /// Expectation: Results are successfully outputted.
  234. TEST_F(MindDataTestProfiler, TestProfilerManagerByStep) {
  235. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManagerByStep.";
  236. // Enable profiler and check
  237. common::SetEnv("RANK_ID", "2");
  238. GlobalContext::config_manager()->set_monitor_sampling_interval(10);
  239. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  240. EXPECT_OK(profiler_manager->Init());
  241. EXPECT_OK(profiler_manager->Start());
  242. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  243. std::shared_ptr<Dataset> ds = set_dataset(20);
  244. // No columns are specified, use all columns
  245. std::vector<std::string> columns = {};
  246. std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 3);
  247. EXPECT_NE(iter, nullptr);
  248. std::vector<uint8_t> cpu_result;
  249. std::vector<uint16_t> op_result;
  250. std::vector<int32_t> connector_result;
  251. std::vector<int32_t> time_result;
  252. float_t queue_result;
  253. uint64_t i = 0;
  254. ASSERT_ERROR(
  255. profiler_manager->GetUserCpuUtilByStep(i, i, &cpu_result)); // Fail in TimeIntervalForStepRange for start_step = 0
  256. ASSERT_ERROR(profiler_manager->GetBatchTimeByStep(
  257. i, i + 2, &time_result)); // Fail in GetRecordEntryFieldValue for end_step > total_steps
  258. ASSERT_ERROR(profiler_manager->GetPipelineTimeByStep(
  259. i + 2, i, &time_result)); // Fail in GetRecordEntryFieldValue for start_step > total_steps
  260. ASSERT_ERROR(profiler_manager->GetPushTimeByStep(
  261. i + 1, i, &time_result)); // Fail in GetRecordEntryFieldValue for start_step > end_steps
  262. std::vector<mindspore::MSTensor> row;
  263. for (int i = 0; i < 3; i++) {
  264. // Iterate the dataset and get each row
  265. ASSERT_OK(iter->GetNextRow(&row));
  266. while (row.size() != 0) {
  267. ASSERT_OK(iter->GetNextRow(&row));
  268. }
  269. }
  270. // Manually terminate the pipeline
  271. iter->Stop();
  272. // There are 3 epochs and 10 samplers for each epoch, 3x10=30 steps in total
  273. for (int i = 1; i < 31; i++) {
  274. ASSERT_OK(profiler_manager->GetUserCpuUtilByStep(i, i, &cpu_result));
  275. ASSERT_OK(profiler_manager->GetSysCpuUtilByStep(i, i, &cpu_result));
  276. // Step is 1 for each iteration, so the output size is expected to be 1
  277. ASSERT_OK(profiler_manager->GetBatchTimeByStep(i, i, &time_result));
  278. EXPECT_EQ(time_result.size(), 1);
  279. time_result.clear();
  280. ASSERT_OK(profiler_manager->GetPipelineTimeByStep(i, i, &time_result));
  281. EXPECT_EQ(time_result.size(), 1);
  282. time_result.clear();
  283. ASSERT_OK(profiler_manager->GetPushTimeByStep(i, i, &time_result));
  284. EXPECT_EQ(time_result.size(), 1);
  285. time_result.clear();
  286. ASSERT_OK(profiler_manager->GetConnectorSizeByStep(i, i, &connector_result));
  287. EXPECT_EQ(connector_result.size(), 1);
  288. connector_result.clear();
  289. ASSERT_OK(profiler_manager->GetConnectorCapacityByStep(i, i, &connector_result));
  290. EXPECT_EQ(connector_result.size(), 1);
  291. connector_result.clear();
  292. ASSERT_OK(profiler_manager->GetEmptyQueueFrequencyByStep(i, i, &queue_result));
  293. EXPECT_GE(queue_result, 0);
  294. EXPECT_LE(queue_result, 1);
  295. }
  296. // Iterate by op_id
  297. for (int i = 0; i < 8; i++) {
  298. ASSERT_OK(profiler_manager->GetUserCpuUtilByStep(i, i+1, i+1, &op_result));
  299. ASSERT_OK(profiler_manager->GetSysCpuUtilByStep(i, i+1, i+1, &op_result));
  300. ASSERT_OK(profiler_manager->GetConnectorSizeByStep(i, i+1, i+1, &connector_result));
  301. EXPECT_GT(connector_result.size(), 0); // Connector size is expected to be greater than 0
  302. connector_result.clear();
  303. }
  304. ASSERT_ERROR(profiler_manager->GetUserCpuUtilByStep(8, 9, 9, &op_result)); // Check there is no op_id=8
  305. int num = profiler_manager->GetNumOfProfiledEpochs();
  306. EXPECT_EQ(num, 3);
  307. // Stop MindData Profiling and save output files to current working directory
  308. EXPECT_OK(profiler_manager->Stop());
  309. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  310. EXPECT_OK(profiler_manager->Save("."));
  311. // File_id is expected to equal RANK_ID
  312. EXPECT_OK(DeleteFiles(2));
  313. }
  314. /// Feature: MindData Profiling Support
  315. /// Description: Test MindData Profiling GetByTime Methods
  316. /// Expectation: Results are successfully outputted.
  317. TEST_F(MindDataTestProfiler, TestProfilerManagerByTime) {
  318. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManagerByTime.";
  319. // Enable profiler and check
  320. common::SetEnv("RANK_ID", "2");
  321. GlobalContext::config_manager()->set_monitor_sampling_interval(10);
  322. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  323. EXPECT_OK(profiler_manager->Init());
  324. EXPECT_OK(profiler_manager->Start());
  325. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  326. std::shared_ptr<Dataset> ds = set_dataset(20);
  327. // No columns are specified, use all columns
  328. std::vector<std::string> columns = {};
  329. std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 5);
  330. EXPECT_NE(iter, nullptr);
  331. std::vector<uint8_t> cpu_result;
  332. std::vector<uint16_t> op_result;
  333. std::vector<int32_t> connector_result;
  334. std::vector<int32_t> time_result;
  335. float_t queue_result;
  336. std::vector<uint64_t> ts = {};
  337. std::vector<mindspore::MSTensor> row;
  338. for (int i = 0; i < 5; i++) {
  339. ts.push_back(ProfilingTime::GetCurMilliSecond());
  340. // Iterate the dataset and get each row
  341. ASSERT_OK(iter->GetNextRow(&row));
  342. while (row.size() != 0) {
  343. ASSERT_OK(iter->GetNextRow(&row));
  344. }
  345. }
  346. ts.push_back(ProfilingTime::GetCurMilliSecond());
  347. // Manually terminate the pipeline
  348. iter->Stop();
  349. for (int i = 1; i < 6; i++) {
  350. uint64_t start_ts = ts[i - 1];
  351. uint64_t end_ts = ts[i];
  352. ASSERT_OK(profiler_manager->GetUserCpuUtilByTime(start_ts, end_ts, &cpu_result));
  353. ASSERT_OK(profiler_manager->GetUserCpuUtilByTime(i - 1, start_ts, end_ts, &op_result));
  354. ASSERT_OK(profiler_manager->GetSysCpuUtilByTime(start_ts, end_ts, &cpu_result));
  355. ASSERT_OK(profiler_manager->GetSysCpuUtilByTime(i - 1, start_ts, end_ts, &op_result));
  356. ASSERT_OK(profiler_manager->GetBatchTimeByTime(start_ts, end_ts, &time_result));
  357. EXPECT_GT(time_result.size(), 0);
  358. time_result.clear();
  359. ASSERT_OK(profiler_manager->GetPipelineTimeByTime(start_ts, end_ts, &time_result));
  360. EXPECT_GT(time_result.size(), 0);
  361. time_result.clear();
  362. ASSERT_OK(profiler_manager->GetPushTimeByTime(start_ts, end_ts, &time_result));
  363. EXPECT_GT(time_result.size(), 0);
  364. time_result.clear();
  365. ASSERT_OK(profiler_manager->GetConnectorSizeByTime(start_ts, end_ts, &connector_result));
  366. EXPECT_GT(connector_result.size(), 0);
  367. connector_result.clear();
  368. ASSERT_OK(profiler_manager->GetConnectorCapacityByTime(start_ts, end_ts, &connector_result));
  369. EXPECT_GT(connector_result.size(), 0);
  370. connector_result.clear();
  371. ASSERT_OK(profiler_manager->GetConnectorSizeByTime(i - 1, start_ts, end_ts, &connector_result));
  372. EXPECT_GT(connector_result.size(), 0); // Connector size is expected to be greater than 0
  373. connector_result.clear();
  374. ASSERT_OK(profiler_manager->GetEmptyQueueFrequencyByTime(start_ts, end_ts, &queue_result));
  375. EXPECT_GE(queue_result, 0);
  376. EXPECT_LE(queue_result, 1);
  377. }
  378. int num = profiler_manager->GetNumOfProfiledEpochs();
  379. EXPECT_EQ(num, 5);
  380. // Stop MindData Profiling and save output files to current working directory
  381. EXPECT_OK(profiler_manager->Stop());
  382. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  383. EXPECT_OK(profiler_manager->Save("."));
  384. // File_id is expected to equal RANK_ID
  385. EXPECT_OK(DeleteFiles(2));
  386. }
  387. } // namespace test
  388. } // namespace dataset
  389. } // namespace mindspore