You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiler.cpp 7.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. /**
  2. * \file src/plugin/impl/profiler.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/plugin/profiler.h"
  12. #include "megbrain/plugin/opr_footprint.h"
  13. #if MGB_ENABLE_JSON
  14. #include "megbrain/graph/event.h"
  15. #include "megbrain/opr/io.h"
  16. #include "megbrain/system.h"
  17. using namespace mgb;
  18. using namespace cg;
  19. MGB_TYPEINFO_OBJ_IMPL(opr_profile::OprProfileHolder);
  20. GraphProfiler::GraphProfiler(cg::ComputingGraph* graph) : PluginBase(graph) {
  21. graph->options()
  22. .user_data.get_user_data_or_create<opr_profile::OprProfileHolder>();
  23. using namespace cg::event;
  24. auto on_seq_start = [this](CompSeqExecBeforeStart const& event) {
  25. m_used_comp_node = event.used_comp_node;
  26. };
  27. auto on_opr_start = [this](OprExecStart const& event) {
  28. ensure_start_time();
  29. if (!opr_filter(event.opr))
  30. return;
  31. OperatorNodeBase* opr = event.opr;
  32. for (auto&& comp_node : get_opr_comp_node_set(event.opr)) {
  33. auto runner = [this, opr, comp_node]() {
  34. MGB_LOCK_GUARD(m_mtx);
  35. auto&& hev = m_host_time[{opr, std::this_thread::get_id()}];
  36. hev.start = m_timer.get_secs();
  37. hev.kern = -1;
  38. record_event(m_kern_event[{opr, comp_node}].start, comp_node);
  39. };
  40. event.env->dispatch_on_comp_node(comp_node, runner);
  41. }
  42. };
  43. auto on_opr_finish = [this](OprExecFinished const& event) {
  44. OperatorNodeBase* opr = event.opr;
  45. if (!opr_filter(opr))
  46. return;
  47. for (auto&& comp_node : get_opr_comp_node_set(event.opr)) {
  48. auto runner = [this, opr]() {
  49. MGB_LOCK_GUARD(m_mtx);
  50. m_host_time[{opr, std::this_thread::get_id()}].end =
  51. m_timer.get_secs();
  52. };
  53. event.env->dispatch_on_comp_node(comp_node, runner);
  54. }
  55. };
  56. auto on_before_kern = [this](BeforeKernel const& event) {
  57. if (!opr_filter(event.opr))
  58. return;
  59. auto footprint = m_opr_footprint_ptr->calc_footprint(event.opr);
  60. CompNodeEventPtr* evptr;
  61. {
  62. MGB_LOCK_GUARD(m_mtx);
  63. m_opr_fp_rst.emplace(event.opr, footprint);
  64. auto&& hev = m_host_time[{event.opr, std::this_thread::get_id()}];
  65. if (hev.kern == -1) {
  66. hev.kern = m_timer.get_secs();
  67. }
  68. evptr = &m_kern_event[{event.opr, event.comp_node}].kern;
  69. }
  70. record_event(*evptr, event.comp_node);
  71. };
  72. auto on_after_kern = [this](AfterKernel const& event) {
  73. if (!opr_filter(event.opr))
  74. return;
  75. CompNodeEventPtr* evptr;
  76. {
  77. MGB_LOCK_GUARD(m_mtx);
  78. evptr = &m_kern_event[{event.opr, event.comp_node}].end;
  79. }
  80. record_event(*evptr, event.comp_node);
  81. };
  82. auto on_graph_compile = [this](const CompSeqOrderDetermined&) {
  83. // clear status after graph recompilation
  84. m_host_time.clear();
  85. m_kern_event.clear();
  86. m_opr_fp_rst.clear();
  87. m_start_of_time = None;
  88. };
  89. auto&& ev = graph->event();
  90. add_event_handler(
  91. ev.register_receiver<CompSeqExecBeforeStart>(on_seq_start));
  92. add_event_handler(ev.register_receiver<OprExecStart>(on_opr_start));
  93. add_event_handler(ev.register_receiver<OprExecFinished>(on_opr_finish));
  94. add_event_handler(ev.register_receiver<BeforeKernel>(on_before_kern));
  95. add_event_handler(ev.register_receiver<AfterKernel>(on_after_kern));
  96. add_event_handler(
  97. ev.register_receiver<CompSeqOrderDetermined>(on_graph_compile));
  98. }
  99. GraphProfiler::~GraphProfiler() noexcept {
  100. auto wait = [](const CompNodeEventPtr& ev) {
  101. if (ev)
  102. ev->host_wait();
  103. };
  104. for (auto&& i : m_kern_event) {
  105. wait(i.second.start);
  106. wait(i.second.kern);
  107. wait(i.second.end);
  108. }
  109. m_owner_graph->options()
  110. .user_data.pop_user_data<opr_profile::OprProfileHolder>();
  111. }
  112. void GraphProfiler::ensure_start_time() {
  113. if (!m_start_of_time.valid()) {
  114. // set up for the first time
  115. m_start_of_time =
  116. CompNode::UnorderedMap<std::unique_ptr<CompNode::Event>>();
  117. for (auto i: *m_used_comp_node) {
  118. i.sync();
  119. auto&& event = m_start_of_time.val()[i];
  120. event = i.create_event(CompNode::Event::NEED_TIMER);
  121. event->record();
  122. }
  123. }
  124. }
  125. void GraphProfiler::record_event(CompNodeEventPtr& dest, CompNode comp_node) {
  126. if (!dest)
  127. dest = comp_node.create_event(CompNode::Event::NEED_TIMER);
  128. dest->record();
  129. }
  130. bool GraphProfiler::opr_filter(cg::OperatorNodeBase* opr) {
  131. static bool only_wait = MGB_GETENV("MGB_PROFILE_ONLY_WAIT");
  132. if (!only_wait)
  133. return true;
  134. if (!opr->input_waiting_spec().empty())
  135. return true;
  136. auto type = opr->dyn_typeinfo();
  137. return type == opr::Copy::typeinfo() ||
  138. type == opr::Host2DeviceCopy::typeinfo();
  139. }
  140. std::shared_ptr<json::Object> GraphProfiler::to_json() const {
  141. using namespace json;
  142. auto dev_prof = Object::make();
  143. auto visit_json_obj = [](Object& obj, const std::string& key) -> Object& {
  144. auto&& v = obj[key];
  145. if (!v)
  146. v = Object::make();
  147. return *static_cast<Object*>(v.get());
  148. };
  149. for (auto&& kern_ev : m_kern_event) {
  150. auto&& opr_prof =
  151. visit_json_obj(*dev_prof, kern_ev.first.first->id_str());
  152. auto comp_node = kern_ev.first.second;
  153. auto&& event = kern_ev.second;
  154. auto&& start = m_start_of_time->at(comp_node);
  155. event.end->host_wait();
  156. opr_prof[comp_node.to_string()] = Object::make({
  157. {"start",
  158. Number::make(start->elapsed_time_until(*event.start))},
  159. {"kern", Number::make(start->elapsed_time_until(*event.kern))},
  160. {"end", Number::make(start->elapsed_time_until(*event.end))},
  161. });
  162. }
  163. auto host_prof = Object::make();
  164. for (auto&& tpair : m_host_time) {
  165. auto&& opr_prof =
  166. visit_json_obj(*host_prof, tpair.first.first->id_str());
  167. auto&& ev = tpair.second;
  168. opr_prof[sys::get_thread_name(tpair.first.second)] =
  169. Object::make({{"start", Number::make(ev.start)},
  170. {"kern", Number::make(ev.kern)},
  171. {"end", Number::make(ev.end)}});
  172. }
  173. auto opr_fp = Object::make();
  174. for (auto&& tpair : m_opr_fp_rst) {
  175. auto&& opr_fp_item = *static_cast<Object*>(opr_fp.get());
  176. opr_fp_item[tpair.first->id_str()] = tpair.second.to_json();
  177. }
  178. auto pf_holder_pair =
  179. m_owner_graph->options()
  180. .user_data.get_user_data<opr_profile::OprProfileHolder>();
  181. mgb_assert(pf_holder_pair.second, "UserData OprProfileHolder not exist.");
  182. auto opr_internal_pf = Object::make();
  183. if ((pf_holder_pair.first[0]->id2object_map).size()) {
  184. for (auto&& pf_pair : pf_holder_pair.first[0]->id2object_map) {
  185. auto&& opr_itnl_pf_item =
  186. *static_cast<Object*>(opr_internal_pf.get());
  187. opr_itnl_pf_item[pf_pair.first->id_str()] = pf_pair.second;
  188. }
  189. }
  190. return Object::make({{"device", dev_prof},
  191. {"host", host_prof},
  192. {"opr_footprint", opr_fp},
  193. {"opr_internal_pf", opr_internal_pf}});
  194. }
  195. #endif // MGB_ENABLE_JSON
  196. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台