You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiler_test.cc 18 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /**
  2. * Copyright 2021-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <chrono>
  17. #include <thread>
  18. #include "common/common.h"
  19. #include "minddata/dataset/engine/perf/profiling.h"
  20. #include "minddata/dataset/include/dataset/datasets.h"
  21. using namespace mindspore::dataset;
  22. using mindspore::LogStream;
  23. using mindspore::MsLogLevel::INFO;
  24. namespace mindspore {
  25. namespace dataset {
  26. namespace test {
  27. class MindDataTestProfiler : public UT::DatasetOpTesting {
  28. protected:
  29. MindDataTestProfiler() {}
  30. Status DeleteFiles(int file_id = 0) {
  31. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  32. std::string pipeline_file = "./pipeline_profiling_" + std::to_string(file_id) + ".json";
  33. std::string cpu_util_file = "./minddata_cpu_utilization_" + std::to_string(file_id) + ".json";
  34. std::string dataset_iterator_file = "./dataset_iterator_profiling_" + std::to_string(file_id) + ".txt";
  35. if (remove(pipeline_file.c_str()) == 0 && remove(cpu_util_file.c_str()) == 0 &&
  36. remove(dataset_iterator_file.c_str()) == 0) {
  37. return Status::OK();
  38. } else {
  39. RETURN_STATUS_UNEXPECTED("Error deleting profiler files");
  40. }
  41. }
  42. std::shared_ptr<Dataset> set_dataset(int32_t op_input) {
  43. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  44. int64_t num_samples = 2;
  45. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(0, num_samples));
  46. EXPECT_NE(ds, nullptr);
  47. ds = ds->Repeat(op_input);
  48. EXPECT_NE(ds, nullptr);
  49. ds = ds->Shuffle(op_input);
  50. EXPECT_NE(ds, nullptr);
  51. // Create objects for the tensor ops
  52. std::shared_ptr<TensorTransform> one_hot = std::make_shared<transforms::OneHot>(op_input);
  53. EXPECT_NE(one_hot, nullptr);
  54. // Create a Map operation, this will automatically add a project after map
  55. ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
  56. EXPECT_NE(ds, nullptr);
  57. ds = ds->Take(op_input);
  58. EXPECT_NE(ds, nullptr);
  59. ds = ds->Batch(op_input, true);
  60. EXPECT_NE(ds, nullptr);
  61. ds = ds->Repeat(op_input);
  62. EXPECT_NE(ds, nullptr);
  63. return ds;
  64. }
  65. };
  66. /// Feature: MindData Profiling Support
  67. /// Description: Test MindData Profiling with profiling enabled for pipeline with ImageFolder
  68. /// Expectation: Profiling files are created.
  69. TEST_F(MindDataTestProfiler, TestProfilerManager1) {
  70. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManager1.";
  71. // Enable profiler and check
  72. common::SetEnv("RANK_ID", "1");
  73. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  74. EXPECT_OK(profiler_manager->Init());
  75. EXPECT_OK(profiler_manager->Start());
  76. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  77. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  78. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(0, 2));
  79. EXPECT_NE(ds, nullptr);
  80. ds = ds->Repeat(2);
  81. EXPECT_NE(ds, nullptr);
  82. ds = ds->Shuffle(4);
  83. EXPECT_NE(ds, nullptr);
  84. // Create objects for the tensor ops
  85. std::shared_ptr<TensorTransform> one_hot = std::make_shared<transforms::OneHot>(10);
  86. EXPECT_NE(one_hot, nullptr);
  87. // Create a Map operation, this will automatically add a project after map
  88. ds = ds->Map({one_hot}, {"label"}, {"label"}, {"label"});
  89. EXPECT_NE(ds, nullptr);
  90. ds = ds->Take(4);
  91. EXPECT_NE(ds, nullptr);
  92. ds = ds->Batch(2, true);
  93. EXPECT_NE(ds, nullptr);
  94. // No columns are specified, use all columns
  95. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  96. EXPECT_NE(iter, nullptr);
  97. // Iterate the dataset and get each row
  98. std::vector<mindspore::MSTensor> row;
  99. ASSERT_OK(iter->GetNextRow(&row));
  100. uint64_t i = 0;
  101. while (row.size() != 0) {
  102. ASSERT_OK(iter->GetNextRow(&row));
  103. i++;
  104. }
  105. EXPECT_EQ(i, 2);
  106. // Manually terminate the pipeline
  107. iter->Stop();
  108. // Stop MindData Profiling and save output files to current working directory
  109. EXPECT_OK(profiler_manager->Stop());
  110. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  111. EXPECT_OK(profiler_manager->Save("."));
  112. // File_id is expected to equal RANK_ID
  113. EXPECT_OK(DeleteFiles(1));
  114. }
  115. /// Feature: MindData Profiling Support
  116. /// Description: Test MindData Profiling with profiling enabled for pipeline with Mnist
  117. /// Expectation: Profiling files are created.
  118. TEST_F(MindDataTestProfiler, TestProfilerManager2) {
  119. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManager2.";
  120. // Enable profiler and check
  121. common::SetEnv("RANK_ID", "2");
  122. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  123. EXPECT_OK(profiler_manager->Init());
  124. EXPECT_OK(profiler_manager->Start());
  125. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  126. // Create a Mnist Dataset
  127. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  128. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<SequentialSampler>(0, 3));
  129. EXPECT_NE(ds, nullptr);
  130. ds = ds->Skip(1);
  131. EXPECT_NE(ds, nullptr);
  132. ds = ds->Repeat(2);
  133. EXPECT_NE(ds, nullptr);
  134. ds = ds->Batch(2, false);
  135. EXPECT_NE(ds, nullptr);
  136. // No columns are specified, use all columns
  137. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  138. EXPECT_NE(iter, nullptr);
  139. // Iterate the dataset and get each row
  140. std::vector<mindspore::MSTensor> row;
  141. ASSERT_OK(iter->GetNextRow(&row));
  142. uint64_t i = 0;
  143. while (row.size() != 0) {
  144. ASSERT_OK(iter->GetNextRow(&row));
  145. i++;
  146. }
  147. EXPECT_EQ(i, 2);
  148. // Manually terminate the pipeline
  149. iter->Stop();
  150. // Stop MindData Profiling and save output files to current working directory
  151. EXPECT_OK(profiler_manager->Stop());
  152. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  153. EXPECT_OK(profiler_manager->Save("."));
  154. // File_id is expected to equal RANK_ID
  155. EXPECT_OK(DeleteFiles(2));
  156. }
  157. /// Feature: MindData Profiling Support
  158. /// Description: Test MindData Profiling GetByEpoch Methods
  159. /// Expectation: Results are successfully outputted.
  160. TEST_F(MindDataTestProfiler, TestProfilerManagerByEpoch) {
  161. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManagerByEpoch.";
  162. // Enable profiler and check
  163. common::SetEnv("RANK_ID", "2");
  164. GlobalContext::config_manager()->set_monitor_sampling_interval(10);
  165. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  166. EXPECT_OK(profiler_manager->Init());
  167. EXPECT_OK(profiler_manager->Start());
  168. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  169. std::shared_ptr<Dataset> ds = set_dataset(20);
  170. // No columns are specified, use all columns
  171. std::vector<std::string> columns = {};
  172. std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 3);
  173. EXPECT_NE(iter, nullptr);
  174. std::vector<uint8_t> cpu_result;
  175. std::vector<uint16_t> op_result;
  176. std::vector<int32_t> connector_result;
  177. std::vector<int32_t> time_result;
  178. float_t queue_result;
  179. // Note: These Get* calls fail since epoch number cannot be 0.
  180. EXPECT_ERROR(profiler_manager->GetUserCpuUtilByEpoch(0, &cpu_result));
  181. EXPECT_ERROR(profiler_manager->GetBatchTimeByEpoch(0, &time_result));
  182. std::vector<mindspore::MSTensor> row;
  183. for (int i = 0; i < 3; i++) {
  184. // Iterate the dataset and get each row
  185. ASSERT_OK(iter->GetNextRow(&row));
  186. while (row.size() != 0) {
  187. ASSERT_OK(iter->GetNextRow(&row));
  188. }
  189. }
  190. // Check iteration failure after finishing the num_epochs
  191. EXPECT_ERROR(iter->GetNextRow(&row));
  192. // Manually terminate the pipeline
  193. iter->Stop();
  194. for (int i = 1; i < 4; i++) {
  195. ASSERT_OK(profiler_manager->GetUserCpuUtilByEpoch(i, &cpu_result));
  196. ASSERT_OK(profiler_manager->GetUserCpuUtilByEpoch(i - 1, i, &op_result));
  197. ASSERT_OK(profiler_manager->GetSysCpuUtilByEpoch(i, &cpu_result));
  198. ASSERT_OK(profiler_manager->GetSysCpuUtilByEpoch(i - 1, i, &op_result));
  199. // Epoch is 1 for each iteration and 20 steps for each epoch, so the output size are expected to be 20
  200. ASSERT_OK(profiler_manager->GetBatchTimeByEpoch(i, &time_result));
  201. EXPECT_EQ(time_result.size(), 20);
  202. time_result.clear();
  203. ASSERT_OK(profiler_manager->GetPipelineTimeByEpoch(i, &time_result));
  204. EXPECT_EQ(time_result.size(), 20);
  205. time_result.clear();
  206. ASSERT_OK(profiler_manager->GetPushTimeByEpoch(i, &time_result));
  207. EXPECT_EQ(time_result.size(), 20);
  208. time_result.clear();
  209. ASSERT_OK(profiler_manager->GetConnectorSizeByEpoch(i, &connector_result));
  210. EXPECT_EQ(connector_result.size(), 20);
  211. connector_result.clear();
  212. ASSERT_OK(profiler_manager->GetConnectorCapacityByEpoch(i, &connector_result));
  213. EXPECT_EQ(connector_result.size(), 20);
  214. connector_result.clear();
  215. ASSERT_OK(profiler_manager->GetConnectorSizeByEpoch(i - 1, i, &connector_result));
  216. EXPECT_GT(connector_result.size(), 0); // Connector size is expected to be greater than 0
  217. connector_result.clear();
  218. ASSERT_OK(profiler_manager->GetEmptyQueueFrequencyByEpoch(i, &queue_result));
  219. EXPECT_GE(queue_result, 0);
  220. EXPECT_LE(queue_result, 1);
  221. }
  222. ASSERT_ERROR(profiler_manager->GetUserCpuUtilByEpoch(4, &cpu_result)); // Check there is no epoch 4
  223. int num = profiler_manager->GetNumOfProfiledEpochs();
  224. EXPECT_EQ(num, 3);
  225. // Stop MindData Profiling and save output files to current working directory
  226. EXPECT_OK(profiler_manager->Stop());
  227. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  228. EXPECT_OK(profiler_manager->Save("."));
  229. // File_id is expected to equal RANK_ID
  230. EXPECT_OK(DeleteFiles(2));
  231. }
  232. /// Feature: MindData Profiling Support
  233. /// Description: Test MindData Profiling GetByStep Methods
  234. /// Expectation: Results are successfully outputted.
  235. TEST_F(MindDataTestProfiler, TestProfilerManagerByStep) {
  236. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManagerByStep.";
  237. // Enable profiler and check
  238. common::SetEnv("RANK_ID", "2");
  239. GlobalContext::config_manager()->set_monitor_sampling_interval(10);
  240. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  241. EXPECT_OK(profiler_manager->Init());
  242. EXPECT_OK(profiler_manager->Start());
  243. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  244. std::shared_ptr<Dataset> ds = set_dataset(20);
  245. // No columns are specified, use all columns
  246. std::vector<std::string> columns = {};
  247. std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 3);
  248. EXPECT_NE(iter, nullptr);
  249. std::vector<uint8_t> cpu_result;
  250. std::vector<uint16_t> op_result;
  251. std::vector<int32_t> connector_result;
  252. std::vector<int32_t> time_result;
  253. float_t queue_result;
  254. uint64_t i = 0;
  255. ASSERT_ERROR(
  256. profiler_manager->GetUserCpuUtilByStep(i, i, &cpu_result)); // Fail in TimeIntervalForStepRange for start_step = 0
  257. ASSERT_ERROR(profiler_manager->GetBatchTimeByStep(
  258. i, i + 2, &time_result)); // Fail in GetRecordEntryFieldValue for end_step > total_steps
  259. ASSERT_ERROR(profiler_manager->GetPipelineTimeByStep(
  260. i + 2, i, &time_result)); // Fail in GetRecordEntryFieldValue for start_step > total_steps
  261. ASSERT_ERROR(profiler_manager->GetPushTimeByStep(
  262. i + 1, i, &time_result)); // Fail in GetRecordEntryFieldValue for start_step > end_steps
  263. std::vector<mindspore::MSTensor> row;
  264. for (int i = 0; i < 3; i++) {
  265. // Iterate the dataset and get each row
  266. ASSERT_OK(iter->GetNextRow(&row));
  267. while (row.size() != 0) {
  268. ASSERT_OK(iter->GetNextRow(&row));
  269. }
  270. }
  271. // Manually terminate the pipeline
  272. iter->Stop();
  273. // There are 3 epochs and 3 samplers for each epoch, 3x3=9 steps in total
  274. for (int i = 1; i < 10; i++) {
  275. ASSERT_OK(profiler_manager->GetUserCpuUtilByStep(i, i, &cpu_result));
  276. ASSERT_OK(profiler_manager->GetUserCpuUtilByStep(i - 1, i, i, &op_result));
  277. ASSERT_OK(profiler_manager->GetSysCpuUtilByStep(i, i, &cpu_result));
  278. ASSERT_OK(profiler_manager->GetSysCpuUtilByStep(i - 1, i, i, &op_result));
  279. // Step is 1 for each iteration, so the output size is expected to be 1
  280. ASSERT_OK(profiler_manager->GetBatchTimeByStep(i, i, &time_result));
  281. EXPECT_EQ(time_result.size(), 1);
  282. time_result.clear();
  283. ASSERT_OK(profiler_manager->GetPipelineTimeByStep(i, i, &time_result));
  284. EXPECT_EQ(time_result.size(), 1);
  285. time_result.clear();
  286. ASSERT_OK(profiler_manager->GetPushTimeByStep(i, i, &time_result));
  287. EXPECT_EQ(time_result.size(), 1);
  288. time_result.clear();
  289. ASSERT_OK(profiler_manager->GetConnectorSizeByStep(i, i, &connector_result));
  290. EXPECT_EQ(connector_result.size(), 1);
  291. connector_result.clear();
  292. ASSERT_OK(profiler_manager->GetConnectorCapacityByStep(i, i, &connector_result));
  293. EXPECT_EQ(connector_result.size(), 1);
  294. connector_result.clear();
  295. ASSERT_OK(profiler_manager->GetConnectorSizeByStep(i - 1, i, i, &connector_result));
  296. EXPECT_GT(connector_result.size(), 0); // Connector size is expected to be greater than 0
  297. connector_result.clear();
  298. ASSERT_OK(profiler_manager->GetEmptyQueueFrequencyByStep(i, i, &queue_result));
  299. EXPECT_GE(queue_result, 0);
  300. EXPECT_LE(queue_result, 1);
  301. ASSERT_OK(
  302. profiler_manager->GetEmptyQueueFrequencyByStep(i - 1, i, &queue_result)); // Check when start_step < end_step
  303. }
  304. ASSERT_ERROR(profiler_manager->GetUserCpuUtilByStep(10, 9, 9, &op_result)); // Check there is no op_id=10
  305. int num = profiler_manager->GetNumOfProfiledEpochs();
  306. EXPECT_EQ(num, 3);
  307. // Stop MindData Profiling and save output files to current working directory
  308. EXPECT_OK(profiler_manager->Stop());
  309. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  310. EXPECT_OK(profiler_manager->Save("."));
  311. // File_id is expected to equal RANK_ID
  312. EXPECT_OK(DeleteFiles(2));
  313. }
  314. /// Feature: MindData Profiling Support
  315. /// Description: Test MindData Profiling GetByTime Methods
  316. /// Expectation: Results are successfully outputted.
  317. TEST_F(MindDataTestProfiler, TestProfilerManagerByTime) {
  318. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProfilerManagerByTime.";
  319. // Enable profiler and check
  320. common::SetEnv("RANK_ID", "2");
  321. GlobalContext::config_manager()->set_monitor_sampling_interval(10);
  322. std::shared_ptr<ProfilingManager> profiler_manager = GlobalContext::profiling_manager();
  323. EXPECT_OK(profiler_manager->Init());
  324. EXPECT_OK(profiler_manager->Start());
  325. EXPECT_TRUE(profiler_manager->IsProfilingEnable());
  326. std::shared_ptr<Dataset> ds = set_dataset(20);
  327. // No columns are specified, use all columns
  328. std::vector<std::string> columns = {};
  329. std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, 5);
  330. EXPECT_NE(iter, nullptr);
  331. std::vector<uint8_t> cpu_result;
  332. std::vector<uint16_t> op_result;
  333. std::vector<int32_t> connector_result;
  334. std::vector<int32_t> time_result;
  335. float_t queue_result;
  336. std::vector<uint64_t> ts = {};
  337. std::vector<mindspore::MSTensor> row;
  338. for (int i = 0; i < 5; i++) {
  339. ts.push_back(ProfilingTime::GetCurMilliSecond());
  340. // Iterate the dataset and get each row
  341. ASSERT_OK(iter->GetNextRow(&row));
  342. while (row.size() != 0) {
  343. ASSERT_OK(iter->GetNextRow(&row));
  344. }
  345. }
  346. ts.push_back(ProfilingTime::GetCurMilliSecond());
  347. // Manually terminate the pipeline
  348. iter->Stop();
  349. for (int i = 1; i < 6; i++) {
  350. uint64_t start_ts = ts[i - 1];
  351. uint64_t end_ts = ts[i];
  352. ASSERT_OK(profiler_manager->GetUserCpuUtilByTime(start_ts, end_ts, &cpu_result));
  353. ASSERT_OK(profiler_manager->GetUserCpuUtilByTime(i - 1, start_ts, end_ts, &op_result));
  354. ASSERT_OK(profiler_manager->GetSysCpuUtilByTime(start_ts, end_ts, &cpu_result));
  355. ASSERT_OK(profiler_manager->GetSysCpuUtilByTime(i - 1, start_ts, end_ts, &op_result));
  356. ASSERT_OK(profiler_manager->GetBatchTimeByTime(start_ts, end_ts, &time_result));
  357. EXPECT_GT(time_result.size(), 0);
  358. time_result.clear();
  359. ASSERT_OK(profiler_manager->GetPipelineTimeByTime(start_ts, end_ts, &time_result));
  360. EXPECT_GT(time_result.size(), 0);
  361. time_result.clear();
  362. ASSERT_OK(profiler_manager->GetPushTimeByTime(start_ts, end_ts, &time_result));
  363. EXPECT_GT(time_result.size(), 0);
  364. time_result.clear();
  365. ASSERT_OK(profiler_manager->GetConnectorSizeByTime(start_ts, end_ts, &connector_result));
  366. EXPECT_GT(connector_result.size(), 0);
  367. connector_result.clear();
  368. ASSERT_OK(profiler_manager->GetConnectorCapacityByTime(start_ts, end_ts, &connector_result));
  369. EXPECT_GT(connector_result.size(), 0);
  370. connector_result.clear();
  371. ASSERT_OK(profiler_manager->GetConnectorSizeByTime(i - 1, start_ts, end_ts, &connector_result));
  372. EXPECT_GT(connector_result.size(), 0); // Connector size is expected to be greater than 0
  373. connector_result.clear();
  374. ASSERT_OK(profiler_manager->GetEmptyQueueFrequencyByTime(start_ts, end_ts, &queue_result));
  375. EXPECT_GE(queue_result, 0);
  376. EXPECT_LE(queue_result, 1);
  377. }
  378. int num = profiler_manager->GetNumOfProfiledEpochs();
  379. EXPECT_EQ(num, 5);
  380. // Stop MindData Profiling and save output files to current working directory
  381. EXPECT_OK(profiler_manager->Stop());
  382. EXPECT_FALSE(profiler_manager->IsProfilingEnable());
  383. EXPECT_OK(profiler_manager->Save("."));
  384. // File_id is expected to equal RANK_ID
  385. EXPECT_OK(DeleteFiles(2));
  386. }
  387. } // namespace test
  388. } // namespace dataset
  389. } // namespace mindspore