You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiling_utils.cc 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <map>
  17. #include "device/ascend/profiling/reporter/graph_desc_reporter.h"
  18. #include "device/ascend/profiling/profiling_utils.h"
  19. #include "kernel/kernel.h"
  20. #include "device/ascend/profiling/profiling_manager.h"
  21. #include "session/anf_runtime_algorithm.h"
  22. #include "common/utils.h"
  23. #include "utils/utils.h"
  24. #include "device/ascend/profiling/reporter/task_desc_reporter.h"
  25. #include "utils/context/ms_context.h"
  26. namespace mindspore {
  27. namespace device {
  28. namespace ascend {
  29. constexpr uint32_t kMaxProfilingNodeNum = 100;
  30. constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
  31. constexpr char kFpStartNode[] = "PROFILING_FP_START";
  32. constexpr char kBpEndNode[] = "PROFILING_BP_END";
  33. constexpr char kIterEndNode[] = "PROFILING_ITER_END";
  34. std::unordered_map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_;
  35. std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
  36. uint32_t ProfilingUtils::custom_node_index_ = 1;
  37. ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr) {
  38. MS_LOG(INFO) << "get env start";
  39. custom_node_index_ = 1;
  40. auto &cnode_exec_order = graph_ptr->execution_order();
  41. ProfilingTraceInfo profiling_trace;
  42. profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order);
  43. profiling_trace.trace_bp_end = GetTraceBpEnd(cnode_exec_order);
  44. profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order);
  45. for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) {
  46. std::string env_str = std::string(kCustomNode) + std::to_string(i);
  47. const char *node_full_name = std::getenv(env_str.c_str());
  48. if (node_full_name == nullptr) {
  49. break;
  50. }
  51. MS_LOG(INFO) << "Get profiling node:" << node_full_name;
  52. profiling_trace.trace_custom_node.insert(node_full_name);
  53. }
  54. MS_LOG(INFO) << "get env end";
  55. GetTraceHccl(cnode_exec_order, NOT_NULL(&profiling_trace));
  56. MS_LOG(INFO) << "[profiling]trace_begin:" << profiling_trace.trace_begin
  57. << " trace_bp_end:" << profiling_trace.trace_bp_end
  58. << " trace_netoutput:" << profiling_trace.trace_netoutput;
  59. return profiling_trace;
  60. }
  61. void ProfilingUtils::GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order,
  62. NotNull<ProfilingTraceInfo *> profiling_trace) {
  63. for (const auto &node : cnode_exec_order) {
  64. if (AnfAlgo::IsCommunicationOp(node)) {
  65. MS_EXCEPTION_IF_NULL(node);
  66. profiling_trace->trace_custom_node.insert(node->fullname_with_scope());
  67. MS_LOG(INFO) << "[profiling]Get hccl node:" << node->fullname_with_scope();
  68. }
  69. }
  70. }
  71. std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order) {
  72. const char *trace_begin = std::getenv(kFpStartNode);
  73. if (trace_begin != nullptr) {
  74. return std::string(trace_begin);
  75. }
  76. std::string fp_start_str;
  77. std::set<std::string> getnext_outputs;
  78. GetCNodeOutputRealNode(kGetNextOpName, cnode_exec_order, NOT_NULL(&getnext_outputs));
  79. if (getnext_outputs.empty()) {
  80. auto first_node = cnode_exec_order.front();
  81. MS_EXCEPTION_IF_NULL(first_node);
  82. fp_start_str = first_node->fullname_with_scope();
  83. } else {
  84. for (auto &cnode : cnode_exec_order) {
  85. if (getnext_outputs.count(cnode->fullname_with_scope()) != 0) {
  86. fp_start_str = cnode->fullname_with_scope();
  87. break;
  88. }
  89. }
  90. }
  91. return fp_start_str;
  92. }
  93. void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,
  94. NotNull<std::set<std::string> *> getnext_outputs) {
  95. for (const auto &cnode : cnode_exec_order) {
  96. for (const auto &input : cnode->inputs()) {
  97. auto prev_cnode = AnfAlgo::VisitKernel(input, 0);
  98. if (!prev_cnode.first->isa<CNode>()) {
  99. continue;
  100. }
  101. if (AnfAlgo::GetCNodeName(prev_cnode.first) == node_name) {
  102. getnext_outputs->insert(cnode->fullname_with_scope());
  103. MS_LOG(INFO) << "Find GetNext Output CNode:" << cnode->fullname_with_scope();
  104. }
  105. }
  106. }
  107. if (getnext_outputs->empty()) {
  108. MS_LOG(WARNING) << "GetNext not found";
  109. }
  110. }
  111. std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order) {
  112. const char *trace_bp_end = std::getenv(kBpEndNode);
  113. if (trace_bp_end != nullptr) {
  114. return std::string(trace_bp_end);
  115. }
  116. std::string bp_end_str;
  117. // Contain hccl kernel
  118. auto iter = cnode_exec_order.rbegin();
  119. while (iter != cnode_exec_order.rend()) {
  120. if (AnfAlgo::IsCommunicationOp(*iter)) {
  121. // store communication op input nodes' name
  122. std::set<std::string> ar_input_node_names;
  123. for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(*iter); ++i) {
  124. auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(*iter, i);
  125. auto input_node = input_node_with_index.first;
  126. ar_input_node_names.insert(input_node->fullname_with_scope());
  127. }
  128. // start from previous node
  129. ++iter;
  130. // find input names in previous node
  131. while (iter != cnode_exec_order.rend()) {
  132. if (ar_input_node_names.find((*iter)->fullname_with_scope()) != ar_input_node_names.end()) {
  133. bp_end_str = (*iter)->fullname_with_scope();
  134. break;
  135. }
  136. ++iter;
  137. }
  138. break;
  139. }
  140. ++iter;
  141. }
  142. if (bp_end_str.empty()) {
  143. bp_end_str = GetGraphLastTbeKernelName(cnode_exec_order);
  144. }
  145. return bp_end_str;
  146. }
  147. std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order) {
  148. std::string last_tbe_kernel_name;
  149. // find last tbe_kernel
  150. for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) {
  151. if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) {
  152. last_tbe_kernel_name = (*iter)->fullname_with_scope();
  153. break;
  154. }
  155. }
  156. if (last_tbe_kernel_name.empty()) {
  157. MS_LOG(WARNING) << "tbe kernel not found in graph";
  158. }
  159. return last_tbe_kernel_name;
  160. }
  161. std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) {
  162. const char *trace_netoutput = std::getenv(kIterEndNode);
  163. return trace_netoutput == nullptr ? GetGraphLastTbeKernelName(cnode_exec_order) : std::string(trace_netoutput);
  164. }
  165. NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content,
  166. NotNull<session::KernelGraph *> graph_ptr) {
  167. kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder;
  168. selected_kernel_builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
  169. selected_kernel_builder.SetInputsDeviceType({TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32});
  170. selected_kernel_builder.SetFusionType(kernel::FusionType::OPAQUE);
  171. selected_kernel_builder.SetProcessor(kernel::Processor::AICORE);
  172. selected_kernel_builder.SetKernelType(KernelType::RT_KERNEL);
  173. abstract::AbstractBasePtr type_none_abstract = std::make_shared<abstract::AbstractNone>();
  174. auto primitive = std::make_shared<Primitive>(ProfilingUtils::kProfiling);
  175. std::vector<AnfNodePtr> inputs;
  176. inputs.emplace_back(NewValueNode(primitive));
  177. CNodePtr cnode_ptr = graph_ptr->NewCNode(inputs);
  178. MS_EXCEPTION_IF_NULL(cnode_ptr);
  179. AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_builder.Build(), cnode_ptr.get());
  180. cnode_ptr->set_abstract(type_none_abstract);
  181. // set attr
  182. ValuePtr notify_value = MakeValue(profiling_content.notify);
  183. ValuePtr trace_id_value = MakeValue(profiling_content.profiler_trace_id);
  184. ValuePtr flags_value = MakeValue(profiling_content.flags);
  185. AnfAlgo::SetNodeAttr(ProfilingUtils::kNotify, notify_value, cnode_ptr);
  186. AnfAlgo::SetNodeAttr(ProfilingUtils::kProfilerTraceId, trace_id_value, cnode_ptr);
  187. AnfAlgo::SetNodeAttr(ProfilingUtils::kFlags, flags_value, cnode_ptr);
  188. return NOT_NULL(cnode_ptr);
  189. }
  190. void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node,
  191. const ProfilingTraceInfo &profiling_trace_info,
  192. NotNull<session::KernelGraph *> graph_ptr,
  193. NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
  194. if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) {
  195. MS_LOG(INFO) << "Profiling Match FpStart:" << profiling_trace_info.trace_begin;
  196. ProfilingTraceJobId(anf_node, graph_ptr, kernel_list);
  197. ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0};
  198. auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr);
  199. kernel_list->emplace_back(fp_profiling_node);
  200. }
  201. }
  202. void ProfilingUtils::ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull<session::KernelGraph *> graph_ptr,
  203. NotNull<std::vector<CNodePtr> *> kernel_list) {
  204. MS_LOG(INFO) << "Profiling Match start";
  205. auto job_id = ProfilingManager::GetInstance().GetJobId();
  206. ProfilingContent job_profiling_context = {false, job_id, 0};
  207. auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
  208. kernel_list->emplace_back(job_profiling_node);
  209. }
  210. CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node,
  211. const ProfilingContent &profiling_content,
  212. NotNull<session::KernelGraph *> graph_ptr) {
  213. CNodePtr profiling_node = CreateProfilingCNode(profiling_content, graph_ptr);
  214. AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), profiling_node.get());
  215. AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), profiling_node.get());
  216. return profiling_node;
  217. }
  218. void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  219. NotNull<session::KernelGraph *> graph_ptr,
  220. NotNull<std::vector<CNodePtr> *> kernel_list) {
  221. MS_EXCEPTION_IF_NULL(anf_node);
  222. auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope());
  223. if (iter == profiling_trace_info.trace_custom_node.end()) {
  224. return;
  225. }
  226. MS_LOG(INFO) << "Profiling Match CustomOp:" << anf_node->fullname_with_scope();
  227. // custom op profiling job start from 3.
  228. ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0};
  229. CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr);
  230. kernel_list->insert(kernel_list->end() - 1, front_node);
  231. ProfilingContent back_profiling_content = {false, 2 * custom_node_index_ + 2, 0};
  232. CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr);
  233. kernel_list->insert(kernel_list->end(), back_node);
  234. ++custom_node_index_;
  235. }
  236. void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  237. NotNull<session::KernelGraph *> graph_ptr,
  238. NotNull<std::vector<CNodePtr> *> kernel_list) {
  239. MS_EXCEPTION_IF_NULL(anf_node);
  240. if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) {
  241. MS_LOG(INFO) << "Profiling Match BpEnd:" << profiling_trace_info.trace_bp_end;
  242. ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0};
  243. CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
  244. kernel_list->emplace_back(bp_end_node);
  245. }
  246. }
  247. void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  248. NotNull<session::KernelGraph *> graph_ptr,
  249. NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
  250. MS_EXCEPTION_IF_NULL(anf_node);
  251. auto full_scope_name = anf_node->fullname_with_scope();
  252. if (profiling_trace_info.trace_netoutput == full_scope_name) {
  253. MS_LOG(INFO) << "Profiling Match IterEnd:" << profiling_trace_info.trace_netoutput;
  254. ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0};
  255. CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
  256. kernel_list->emplace_back(bp_kernel_ptr);
  257. }
  258. }
  259. void ProfilingUtils::SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names) {
  260. auto ret = graph_kernel_name_.try_emplace(graph_id, kernel_names);
  261. if (!ret.second) {
  262. MS_LOG(ERROR) << "[profiling]graph " << graph_id << " kernel names already exist";
  263. }
  264. }
  265. void ProfilingUtils::SetGraphProfilingCNode(uint32_t graph_id, const std::vector<CNodePtr> &profiling_cnode_list) {
  266. auto ret = graph_profiling_cnode_.try_emplace(graph_id, profiling_cnode_list);
  267. if (!ret.second) {
  268. MS_LOG(ERROR) << "[profiling]graph " << graph_id << " profiling cnode list already exist";
  269. }
  270. }
  271. bool ProfilingUtils::ValidComputeGraph(NotNull<const session::KernelGraph *> graph_ptr) {
  272. for (const auto &node : graph_ptr->execution_order()) {
  273. if (AnfAlgo::GetKernelType(node) == TBE_KERNEL) {
  274. return true;
  275. }
  276. }
  277. return false;
  278. }
  279. void ProfilingUtils::ReportProfilingData(const std::vector<uint32_t> &task_ids,
  280. NotNull<const session::KernelGraph *> graph) {
  281. if (!ValidComputeGraph(graph)) {
  282. MS_LOG(WARNING) << "Not a valid compute graph:" << graph->graph_id();
  283. return;
  284. }
  285. auto ret = graph_profiling_cnode_.find(graph->graph_id());
  286. if (ret == graph_profiling_cnode_.end()) {
  287. MS_LOG(ERROR) << "Graph id not found";
  288. return;
  289. }
  290. auto context = MsContext::GetInstance();
  291. MS_EXCEPTION_IF_NULL(context);
  292. TaskDescReporter task_reporter(context->device_id(), "vm.task_desc_info_" + std::to_string(graph->graph_id()),
  293. ret->second);
  294. task_reporter.set_task_ids(task_ids);
  295. task_reporter.ReportData();
  296. GraphDescReporter graph_reporter(context->device_id(), "vm.graph_desc_info_" + std::to_string(graph->graph_id()),
  297. ret->second);
  298. graph_reporter.ReportData();
  299. }
  300. } // namespace ascend
  301. } // namespace device
  302. } // namespace mindspore