You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

memory_chunk.cpp 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. #include <map>
  2. #include <vector>
  3. #include <array>
  4. #include "megbrain/imperative/utils/to_string.h"
  5. #include "megbrain/utils/debug.h"
  6. #include "./formats.h"
  7. #include "./states.h"
  8. #include "./events.h"
  9. namespace mgb::imperative::profiler {
  10. class XMLWriter {
  11. private:
  12. std::vector<std::vector<std::string>> elements;
  13. public:
  14. struct ElementGuard {
  15. XMLWriter* writer;
  16. std::string name;
  17. std::vector<std::pair<std::string, std::string>> attrs;
  18. template <typename T>
  19. ElementGuard& attr(std::string key, T&& value) {
  20. attrs.push_back({key, mgb::imperative::to_string(value)});
  21. return *this;
  22. }
  23. std::string to_string_start() const {
  24. std::string builder;
  25. builder.append(ssprintf("<%s",
  26. name.c_str()));
  27. for (auto&& [k, v]: attrs) {
  28. builder.append(ssprintf(" %s=\"%s\"", k.c_str(), v.c_str()));
  29. }
  30. builder.append(">\n");
  31. return builder;
  32. }
  33. std::string to_string_end() const {
  34. return ssprintf("</%s>\n", name.c_str());
  35. }
  36. ElementGuard(XMLWriter* writer, std::string name): writer{writer}, name{name} {
  37. writer->elements.emplace_back();
  38. }
  39. ~ElementGuard() {
  40. auto children = std::move(writer->elements.back());
  41. writer->elements.pop_back();
  42. std::string builder;
  43. builder.append(to_string_start());
  44. for (auto&& child: children) {
  45. builder.append(child);
  46. }
  47. builder.append(to_string_end());
  48. writer->elements.back().push_back(builder);
  49. }
  50. };
  51. XMLWriter() {
  52. elements.emplace_back().push_back("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  53. }
  54. ElementGuard element(std::string tag) {
  55. return ElementGuard{this, tag};
  56. }
  57. void text(std::string text) {
  58. elements.back().push_back(text);
  59. }
  60. void doctype(std::string element, std::string dtd, std::vector<std::string> args) {
  61. std::string builder = ssprintf("<!DOCTYPE %s %s", element.c_str(), dtd.c_str());
  62. for (auto&& arg: args) {
  63. builder.append(ssprintf(" %s", arg.c_str()));
  64. }
  65. builder.append(">\n");
  66. elements.back().push_back(builder);
  67. }
  68. std::string to_string() const {
  69. mgb_assert(elements.size() == 1 && elements[0].size() >= 1);
  70. std::string builder;
  71. for (auto&& element: elements[0]) {
  72. builder.append(element);
  73. }
  74. return builder;
  75. }
  76. };
  77. struct MemoryChunk {
  78. std::array<uintptr_t, 2> address;
  79. std::string name;
  80. TensorLayout layout;
  81. std::array<uint64_t, 2> time;
  82. bool empty() const {
  83. return address[1] - address[0] == 0;
  84. }
  85. };
  86. struct MemoryFlow {
  87. std::unordered_map<uint64_t, MemoryChunk> chunks;
  88. std::pair<uintptr_t, uintptr_t> address_range() const {
  89. auto addr_begin = std::numeric_limits<uintptr_t>::max();
  90. auto addr_end = std::numeric_limits<uintptr_t>::min();
  91. for(auto&& [id, chunk]: chunks) {
  92. if (chunk.empty()) continue;
  93. addr_begin = std::min(addr_begin, chunk.address[0]);
  94. addr_end = std::max(addr_end, chunk.address[1]);
  95. }
  96. return {addr_begin, addr_end};
  97. }
  98. std::pair<uint64_t, uint64_t> time_range() const {
  99. auto time_begin = std::numeric_limits<uint64_t>::max();
  100. auto time_end = std::numeric_limits<uint64_t>::min();
  101. for(auto&& [id, chunk]: chunks) {
  102. if (chunk.empty()) continue;
  103. time_begin = std::min(time_begin, chunk.time[0]);
  104. time_end = std::max(time_end, chunk.time[1]);
  105. }
  106. return {time_begin, time_end};
  107. }
  108. std::shared_ptr<json::Array> to_json() const {
  109. auto results = json::Array::make();
  110. for(auto&& [id, chunk]: chunks) {
  111. if (chunk.empty()) continue;
  112. auto address = json::Array::make();
  113. auto time = json::Array::make();
  114. address->add(json::String::make(std::to_string(chunk.address[0])));
  115. address->add(json::String::make(std::to_string(chunk.address[1])));
  116. time->add(json::String::make(std::to_string(chunk.time[0])));
  117. time->add(json::String::make(std::to_string(chunk.time[1])));
  118. results->add(json::Object::make({
  119. {"address", address},
  120. {"name", json::String::make(chunk.name)},
  121. {"layout", json::String::make(chunk.layout.to_string())},
  122. {"time", time}
  123. }));
  124. }
  125. return results;
  126. }
  127. XMLWriter to_svg() const {
  128. XMLWriter writer;
  129. auto&& [addr_begin, addr_end] = address_range();
  130. auto&& [time_begin, time_end] = time_range();
  131. writer.doctype("svg", "PUBLIC", {
  132. "\"-//W3C//DTD SVG 1.1//EN\"",
  133. "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\""
  134. });
  135. auto svg = writer.element("svg");
  136. svg.attr("xmlns", std::string{"http://www.w3.org/2000/svg"});
  137. svg.attr("xmlns:tag", std::string{"https://megengine.org.cn"});
  138. double time_scale = 1e5;
  139. double addr_scale = 1e6;
  140. svg.attr("width", (time_end-time_begin)/time_scale);
  141. svg.attr("height", (addr_end-addr_begin)/addr_scale);
  142. {
  143. auto rect = writer.element("rect");
  144. rect.attr("x", 0);
  145. rect.attr("y", 0);
  146. rect.attr("width", (time_end-time_begin)/time_scale);
  147. rect.attr("height", (addr_end-addr_begin)/addr_scale);
  148. rect.attr("fill", std::string{"blue"});
  149. }
  150. double us = 1e3, ms = 1e6;
  151. std::map<double, std::string> time2color = {
  152. {0 * us, "#DDDDDD"},
  153. {100 * us, "#CCCCCC"},
  154. {1 * ms, "#BBBBBB"},
  155. {10 * ms, "#AAAAAA"},
  156. {100 * ms, "#999999"},
  157. {1000 * ms, "#888888"},
  158. {std::numeric_limits<double>::infinity(), "#555555"},
  159. };
  160. auto time2str = [](uint64_t ns){
  161. using pair_t = std::pair<uint64_t, const char*>;
  162. static pair_t units[] = {
  163. {1, "ns "},
  164. {1e3, "us "},
  165. {1e6, "ms "},
  166. {1e9, "s "},
  167. };
  168. std::string builder;
  169. auto comparator = [](const pair_t& lhs, const pair_t& rhs) {
  170. return lhs.first < rhs.first;
  171. };
  172. while (ns > 0) {
  173. auto iter = std::upper_bound(std::begin(units), std::end(units), std::make_pair(ns, ""), comparator) - 1;
  174. builder += std::to_string(ns / iter->first) + iter->second;
  175. ns = ns % iter->first;
  176. }
  177. return builder;
  178. };
  179. auto size2str = [](size_t sz){
  180. using pair_t = std::pair<size_t, const char*>;
  181. static pair_t units[] = {
  182. {1, "B "},
  183. {1024, "KB "},
  184. {1024*1024, "MB "},
  185. {1024*1024*1024, "GB "},
  186. };
  187. std::string builder;
  188. auto comparator = [](const pair_t& lhs, const pair_t& rhs) {
  189. return lhs.first < rhs.first;
  190. };
  191. while (sz > 0) {
  192. auto iter = std::upper_bound(std::begin(units), std::end(units), std::make_pair(sz, ""), comparator) - 1;
  193. builder += std::to_string(sz / iter->first) + iter->second;
  194. sz = sz % iter->first;
  195. }
  196. return builder;
  197. };
  198. for (auto&& [id, chunk]: chunks) {
  199. if (chunk.empty()) continue;
  200. double left = (chunk.time[0]-time_begin)/time_scale;
  201. double right = (chunk.time[1]-time_begin)/time_scale;
  202. double top = (chunk.address[0]-addr_begin)/addr_scale;
  203. double bottom = (chunk.address[1]-addr_begin)/addr_scale;
  204. double duration = chunk.time[1] - chunk.time[0];
  205. {
  206. auto rect = writer.element("rect");
  207. rect.attr("x", left);
  208. rect.attr("y", top);
  209. rect.attr("height", bottom - top);
  210. rect.attr("width", right - left);
  211. rect.attr("fill", time2color.lower_bound(duration)->second);
  212. auto mge_attr = [&](const char* name, auto&& value) {
  213. rect.attr(ssprintf("tag:%s", name), value);
  214. };
  215. mge_attr("type", std::string("tensor"));
  216. mge_attr("name", chunk.name);
  217. mge_attr("address", ssprintf("%p", reinterpret_cast<void*>(chunk.address[0])));
  218. mge_attr("size", size2str(chunk.address[1] - chunk.address[0]));
  219. mge_attr("layout", chunk.layout.to_string());
  220. mge_attr("produced", time2str(chunk.time[0]));
  221. mge_attr("erased", time2str(chunk.time[1]));
  222. mge_attr("duration", time2str(chunk.time[1] - chunk.time[0]));
  223. }
  224. }
  225. return writer;
  226. }
  227. };
  228. void dump_memory_flow(std::string filename, Profiler::options_t options, Profiler::thread_dict_t thread_dict, Profiler::results_t results) {
  229. MemoryFlow flow;
  230. ProfileDataCollector collector;
  231. ProfileState state;
  232. #define HANDLE_EVENT(type, ...) \
  233. collector.handle<type>([&](uint64_t id, std::thread::id tid, uint64_t time, type event) __VA_ARGS__ );
  234. HANDLE_EVENT(TensorDeclareEvent, {
  235. auto& tensor_state = state.tensors[event.tensor_id] = {};
  236. tensor_state.id = event.tensor_id;
  237. tensor_state.name = event.name;
  238. });
  239. HANDLE_EVENT(TensorProduceEvent, {
  240. auto& tensor_state = state.tensors[event.tensor_id];
  241. tensor_state.device = event.device;
  242. tensor_state.layout = event.layout;
  243. tensor_state.produced = time;
  244. state.tensors_by_size.insert({tensor_state.id, tensor_state.size_in_bytes()});
  245. state.tensors_by_produced.insert({tensor_state.id, tensor_state.produced});
  246. auto& chunk = flow.chunks[event.tensor_id];
  247. uintptr_t address = reinterpret_cast<uintptr_t>(event.ptr);
  248. auto span = event.layout.span();
  249. auto dtype = event.layout.dtype;
  250. // assume dtype is not lowbit
  251. if (!address) {
  252. chunk.address = {0, 0};
  253. } else {
  254. chunk.address = {address+span.low_elem*dtype.size(), address+span.high_elem*dtype.size()};
  255. }
  256. chunk.layout = tensor_state.layout;
  257. chunk.time[0] = time;
  258. chunk.name = tensor_state.name;
  259. });
  260. HANDLE_EVENT(TensorReleaseEvent, {
  261. auto& tensor_state = state.tensors[event.tensor_id];
  262. state.tensors_by_size.erase({tensor_state.id, tensor_state.size_in_bytes()});
  263. state.tensors_by_produced.erase({tensor_state.id, tensor_state.produced});
  264. auto& chunk = flow.chunks[event.tensor_id];
  265. chunk.time[1] = time;
  266. });
  267. HANDLE_EVENT(ScopeEvent, {
  268. state.threads[tid].scope_stack.push_back(event.name);
  269. });
  270. HANDLE_EVENT(ScopeFinishEvent, {
  271. mgb_assert(state.threads[tid].scope_stack.back() == event.name);
  272. state.threads[tid].scope_stack.pop_back();
  273. });
  274. for (auto&& result: results) {
  275. collector(result.second.id, result.first, result.second.time, result.second.data);
  276. }
  277. debug::write_to_file(filename.c_str(), flow.to_svg().to_string());
  278. }
  279. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台