You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiling_utils.cc 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "device/ascend/profiling/reporter/graph_desc_reporter.h"
  17. #include "device/ascend/profiling/profiling_utils.h"
  18. #include "kernel/kernel.h"
  19. #include "device/ascend/profiling/profiling_manager.h"
  20. #include "session/anf_runtime_algorithm.h"
  21. #include "common/utils.h"
  22. #include "utils/utils.h"
  23. #include "device/ascend/profiling/reporter/task_desc_reporter.h"
  24. #include "utils/context/ms_context.h"
  25. #include "device/ascend/profiling/reporter/point_reporter.h"
  26. namespace mindspore {
  27. namespace device {
  28. namespace ascend {
  29. constexpr uint32_t kMaxProfilingNodeNum = 100;
  30. constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
  31. constexpr char kFpStartNode[] = "PROFILING_FP_START";
  32. constexpr char kBpEndNode[] = "PROFILING_BP_END";
  33. constexpr char kIterEndNode[] = "PROFILING_ITER_END";
  34. // PROFILING_CUSTOM_LOGID_START 3
  35. constexpr uint64_t kProfilingFpStartLogId = 1;
  36. constexpr uint64_t kProfilingBpEndLogId = 2;
  37. constexpr uint64_t kProfilingIterEndLogId = 255;
  38. std::map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_;
  39. std::map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
  40. std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> ProfilingUtils::graph_point_;
  41. uint32_t ProfilingUtils::custom_node_index_ = 1;
  42. ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr) {
  43. MS_LOG(INFO) << "get env start";
  44. custom_node_index_ = 1;
  45. auto &cnode_exec_order = graph_ptr->execution_order();
  46. ProfilingTraceInfo profiling_trace;
  47. profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order);
  48. profiling_trace.trace_bp_end = GetTraceBpEnd(cnode_exec_order);
  49. profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order);
  50. for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) {
  51. std::string env_str = std::string(kCustomNode) + std::to_string(i);
  52. const char *node_full_name = std::getenv(env_str.c_str());
  53. if (node_full_name == nullptr) {
  54. break;
  55. }
  56. MS_LOG(INFO) << "Get profiling node:" << node_full_name;
  57. profiling_trace.trace_custom_node.insert(node_full_name);
  58. }
  59. MS_LOG(INFO) << "get env end";
  60. GetTraceHccl(cnode_exec_order, NOT_NULL(&profiling_trace));
  61. MS_LOG(INFO) << "[profiling]trace_begin:" << profiling_trace.trace_begin
  62. << " trace_bp_end:" << profiling_trace.trace_bp_end
  63. << " trace_netoutput:" << profiling_trace.trace_netoutput;
  64. return profiling_trace;
  65. }
  66. void ProfilingUtils::GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order,
  67. NotNull<ProfilingTraceInfo *> profiling_trace) {
  68. for (const auto &node : cnode_exec_order) {
  69. if (AnfAlgo::IsCommunicationOp(node)) {
  70. MS_EXCEPTION_IF_NULL(node);
  71. profiling_trace->trace_custom_node.insert(node->fullname_with_scope());
  72. MS_LOG(INFO) << "[profiling]Get hccl node:" << node->fullname_with_scope();
  73. }
  74. }
  75. }
  76. std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order) {
  77. const char *trace_begin = std::getenv(kFpStartNode);
  78. if (trace_begin != nullptr) {
  79. return std::string(trace_begin);
  80. }
  81. std::string fp_start_str;
  82. std::set<std::string> getnext_outputs;
  83. GetCNodeOutputRealNode(kGetNextOpName, cnode_exec_order, NOT_NULL(&getnext_outputs));
  84. if (getnext_outputs.empty()) {
  85. auto first_node = cnode_exec_order.front();
  86. MS_EXCEPTION_IF_NULL(first_node);
  87. fp_start_str = first_node->fullname_with_scope();
  88. } else {
  89. for (auto &cnode : cnode_exec_order) {
  90. if (getnext_outputs.count(cnode->fullname_with_scope()) != 0) {
  91. fp_start_str = cnode->fullname_with_scope();
  92. break;
  93. }
  94. }
  95. }
  96. return fp_start_str;
  97. }
  98. void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,
  99. NotNull<std::set<std::string> *> getnext_outputs) {
  100. for (const auto &cnode : cnode_exec_order) {
  101. MS_EXCEPTION_IF_NULL(cnode);
  102. for (const auto &input : cnode->inputs()) {
  103. auto prev_cnode = AnfAlgo::VisitKernel(input, 0);
  104. if (!prev_cnode.first->isa<CNode>()) {
  105. continue;
  106. }
  107. if (AnfAlgo::GetCNodeName(prev_cnode.first) == node_name) {
  108. getnext_outputs->insert(cnode->fullname_with_scope());
  109. MS_LOG(INFO) << "Find GetNext Output CNode:" << cnode->fullname_with_scope();
  110. }
  111. }
  112. }
  113. if (getnext_outputs->empty()) {
  114. MS_LOG(WARNING) << "GetNext not found";
  115. }
  116. }
  117. std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order) {
  118. const char *trace_bp_end = std::getenv(kBpEndNode);
  119. if (trace_bp_end != nullptr) {
  120. return std::string(trace_bp_end);
  121. }
  122. std::string bp_end_str;
  123. // Contain hccl kernel
  124. auto iter = cnode_exec_order.rbegin();
  125. while (iter != cnode_exec_order.rend()) {
  126. if (AnfAlgo::IsCommunicationOp(*iter)) {
  127. // store communication op input nodes' name
  128. std::set<std::string> ar_input_node_names;
  129. for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(*iter); ++i) {
  130. auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(*iter, i);
  131. auto input_node = input_node_with_index.first;
  132. ar_input_node_names.insert(input_node->fullname_with_scope());
  133. }
  134. // start from previous node
  135. ++iter;
  136. // find input names in previous node
  137. while (iter != cnode_exec_order.rend()) {
  138. if (ar_input_node_names.find((*iter)->fullname_with_scope()) != ar_input_node_names.end()) {
  139. bp_end_str = (*iter)->fullname_with_scope();
  140. break;
  141. }
  142. ++iter;
  143. }
  144. break;
  145. }
  146. ++iter;
  147. }
  148. if (bp_end_str.empty()) {
  149. bp_end_str = GetGraphLastTbeKernelName(cnode_exec_order);
  150. }
  151. return bp_end_str;
  152. }
  153. std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order) {
  154. std::string last_tbe_kernel_name;
  155. // find last tbe_kernel
  156. for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) {
  157. if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) {
  158. last_tbe_kernel_name = (*iter)->fullname_with_scope();
  159. break;
  160. }
  161. }
  162. if (last_tbe_kernel_name.empty()) {
  163. MS_LOG(WARNING) << "tbe kernel not found in graph";
  164. }
  165. return last_tbe_kernel_name;
  166. }
  167. std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) {
  168. const char *trace_netoutput = std::getenv(kIterEndNode);
  169. return trace_netoutput == nullptr ? GetGraphLastTbeKernelName(cnode_exec_order) : std::string(trace_netoutput);
  170. }
  171. NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content,
  172. NotNull<session::KernelGraph *> graph_ptr) {
  173. kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder;
  174. selected_kernel_builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
  175. selected_kernel_builder.SetInputsDeviceType({TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32});
  176. selected_kernel_builder.SetFusionType(kernel::FusionType::OPAQUE);
  177. selected_kernel_builder.SetProcessor(kernel::Processor::AICORE);
  178. selected_kernel_builder.SetKernelType(KernelType::RT_KERNEL);
  179. abstract::AbstractBasePtr type_none_abstract = std::make_shared<abstract::AbstractNone>();
  180. auto primitive = std::make_shared<Primitive>(ProfilingUtils::kProfiling);
  181. std::vector<AnfNodePtr> inputs;
  182. inputs.emplace_back(NewValueNode(primitive));
  183. CNodePtr cnode_ptr = graph_ptr->NewCNode(inputs);
  184. MS_EXCEPTION_IF_NULL(cnode_ptr);
  185. AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_builder.Build(), cnode_ptr.get());
  186. cnode_ptr->set_abstract(type_none_abstract);
  187. // set attr
  188. ValuePtr notify_value = MakeValue(profiling_content.notify);
  189. ValuePtr trace_id_value = MakeValue(profiling_content.profiler_trace_id);
  190. ValuePtr flags_value = MakeValue(profiling_content.flags);
  191. AnfAlgo::SetNodeAttr(ProfilingUtils::kNotify, notify_value, cnode_ptr);
  192. AnfAlgo::SetNodeAttr(ProfilingUtils::kProfilerTraceId, trace_id_value, cnode_ptr);
  193. AnfAlgo::SetNodeAttr(ProfilingUtils::kFlags, flags_value, cnode_ptr);
  194. return NOT_NULL(cnode_ptr);
  195. }
  196. void ProfilingUtils::SaveProfilingPoint(uint32_t graph_id, const std::string &node_name, uint32_t point_id) {
  197. std::shared_ptr<ProfDesc> prof_desc_ptr = std::make_shared<PointDesc>(node_name, point_id);
  198. auto iter = graph_point_.find(graph_id);
  199. if (iter == graph_point_.end()) {
  200. std::vector<std::shared_ptr<ProfDesc>> tmp_vect = {prof_desc_ptr};
  201. graph_point_.insert({graph_id, tmp_vect});
  202. } else {
  203. iter->second.emplace_back(prof_desc_ptr);
  204. }
  205. }
  206. void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node,
  207. const ProfilingTraceInfo &profiling_trace_info,
  208. NotNull<session::KernelGraph *> graph_ptr,
  209. NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
  210. if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) {
  211. MS_LOG(INFO) << "Profiling Match FpStart:" << profiling_trace_info.trace_begin;
  212. ProfilingTraceJobId(anf_node, graph_ptr, kernel_list);
  213. ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0};
  214. auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr);
  215. kernel_list->emplace_back(fp_profiling_node);
  216. // insert ProfDesc
  217. SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), kProfilingFpStartLogId);
  218. }
  219. }
  220. void ProfilingUtils::ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull<session::KernelGraph *> graph_ptr,
  221. NotNull<std::vector<CNodePtr> *> kernel_list) {
  222. MS_LOG(INFO) << "Profiling Match start";
  223. auto job_id = ProfilingManager::GetInstance().GetJobId();
  224. ProfilingContent job_profiling_context = {false, job_id, 0};
  225. auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
  226. kernel_list->emplace_back(job_profiling_node);
  227. }
  228. CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node,
  229. const ProfilingContent &profiling_content,
  230. NotNull<session::KernelGraph *> graph_ptr) {
  231. CNodePtr profiling_node = CreateProfilingCNode(profiling_content, graph_ptr);
  232. AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), profiling_node.get());
  233. AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), profiling_node.get());
  234. return profiling_node;
  235. }
  236. void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  237. NotNull<session::KernelGraph *> graph_ptr,
  238. NotNull<std::vector<CNodePtr> *> kernel_list) {
  239. MS_EXCEPTION_IF_NULL(anf_node);
  240. auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope());
  241. if (iter == profiling_trace_info.trace_custom_node.end()) {
  242. return;
  243. }
  244. MS_LOG(INFO) << "Profiling Match CustomOp:" << anf_node->fullname_with_scope();
  245. // custom op profiling job start from 3.
  246. auto custom_point_id = 2 * custom_node_index_ + 1;
  247. ProfilingContent front_profiling_content = {false, custom_point_id, 0};
  248. CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr);
  249. kernel_list->insert(kernel_list->end() - 1, front_node);
  250. SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), custom_point_id);
  251. ProfilingContent back_profiling_content = {false, custom_point_id + 1, 0};
  252. CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr);
  253. kernel_list->insert(kernel_list->end(), back_node);
  254. SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), custom_point_id + 1);
  255. ++custom_node_index_;
  256. }
  257. void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  258. NotNull<session::KernelGraph *> graph_ptr,
  259. NotNull<std::vector<CNodePtr> *> kernel_list) {
  260. MS_EXCEPTION_IF_NULL(anf_node);
  261. if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) {
  262. MS_LOG(INFO) << "Profiling Match BpEnd:" << profiling_trace_info.trace_bp_end;
  263. ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0};
  264. CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
  265. kernel_list->emplace_back(bp_end_node);
  266. SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), kProfilingBpEndLogId);
  267. }
  268. }
  269. void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  270. NotNull<session::KernelGraph *> graph_ptr,
  271. NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
  272. MS_EXCEPTION_IF_NULL(anf_node);
  273. auto full_scope_name = anf_node->fullname_with_scope();
  274. if (profiling_trace_info.trace_netoutput == full_scope_name) {
  275. MS_LOG(INFO) << "Profiling Match IterEnd:" << profiling_trace_info.trace_netoutput;
  276. ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0};
  277. CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
  278. kernel_list->emplace_back(bp_kernel_ptr);
  279. SaveProfilingPoint(graph_ptr->graph_id(), anf_node->fullname_with_scope(), kProfilingIterEndLogId);
  280. }
  281. }
  282. void ProfilingUtils::SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names) {
  283. auto ret = graph_kernel_name_.try_emplace(graph_id, kernel_names);
  284. if (!ret.second) {
  285. MS_LOG(ERROR) << "[profiling]graph " << graph_id << " kernel names already exist";
  286. }
  287. }
  288. void ProfilingUtils::SetGraphProfilingCNode(uint32_t graph_id, const std::vector<CNodePtr> &profiling_cnode_list) {
  289. auto ret = graph_profiling_cnode_.try_emplace(graph_id, profiling_cnode_list);
  290. if (!ret.second) {
  291. MS_LOG(ERROR) << "[profiling]graph " << graph_id << " profiling cnode list already exist";
  292. }
  293. }
  294. bool ProfilingUtils::ValidComputeGraph(NotNull<const session::KernelGraph *> graph_ptr) {
  295. for (const auto &node : graph_ptr->execution_order()) {
  296. if (AnfAlgo::GetKernelType(node) == TBE_KERNEL) {
  297. return true;
  298. }
  299. }
  300. return false;
  301. }
  302. void ProfilingUtils::ReportProfilingData(const std::vector<uint32_t> &task_ids, const std::vector<uint32_t> &stream_ids,
  303. NotNull<const session::KernelGraph *> graph) {
  304. if (!ValidComputeGraph(graph)) {
  305. MS_LOG(WARNING) << "Not a valid compute graph:" << graph->graph_id();
  306. return;
  307. }
  308. auto ret = graph_profiling_cnode_.find(graph->graph_id());
  309. if (ret == graph_profiling_cnode_.end()) {
  310. MS_LOG(ERROR) << "Graph id not found";
  311. return;
  312. }
  313. auto context = MsContext::GetInstance();
  314. MS_EXCEPTION_IF_NULL(context);
  315. TaskDescReporter task_reporter(context->device_id(), "vm.task_desc_info", ret->second);
  316. task_reporter.set_task_ids(task_ids);
  317. task_reporter.set_stream_ids(stream_ids);
  318. task_reporter.ReportData();
  319. GraphDescReporter graph_reporter(context->device_id(), "vm.graph_desc_info", ret->second);
  320. graph_profiling_cnode_.erase(ret);
  321. graph_reporter.ReportData();
  322. // Report profiling point
  323. auto point_iter = graph_point_.find(graph->graph_id());
  324. if (point_iter == graph_point_.end()) {
  325. MS_LOG(ERROR) << "Graph id not found in graph_point";
  326. return;
  327. }
  328. PointReporter point_reporter(context->device_id(), "vm.point");
  329. for (const auto &point : point_iter->second) {
  330. point_reporter.AddReportData(point);
  331. }
  332. point_reporter.ReportData();
  333. }
  334. } // namespace ascend
  335. } // namespace device
  336. } // namespace mindspore