You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

debugger_utils.cc 9.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/debugger/debugger_utils.h"
  17. #include <iostream>
  18. #include <vector>
  19. #include <memory>
  20. #include <string>
  21. #include "debug/anf_ir_utils.h"
  22. #include "debug/debugger/debugger.h"
  23. #include "runtime/device/gpu/gpu_device_address.h"
  24. #include "debug/data_dump/dump_json_parser.h"
  25. #ifdef ENABLE_D
  26. #include "debug/dump_data_builder.h"
  27. #endif
  28. #include "backend/session/anf_runtime_algorithm.h"
  29. #include "backend/kernel_compiler/kernel.h"
  30. #include "debug/data_dump/e2e_dump.h"
  31. #include "utils/config_manager.h"
  32. constexpr int kFailure = 1;
  33. using mindspore::kernel::AddressPtr;
  34. using mindspore::kernel::KernelLaunchInfo;
  35. using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
  36. using KernelGraph = mindspore::session::KernelGraph;
  37. using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
  38. namespace mindspore {
  39. std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
  40. // define a vector containing real output number
  41. std::vector<size_t> real_outputs;
  42. // P.BatchNorm is used for training and inference
  43. // can add the filter list for more operators here....
  44. if (node_name == "BatchNorm") {
  45. MS_LOG(INFO) << "loading node named " << node_name;
  46. (void)real_outputs.insert(real_outputs.end(), {0, 3, 4});
  47. } else {
  48. // by default, TensorLoader will load all outputs
  49. for (size_t j = 0; j < output_size; ++j) {
  50. real_outputs.push_back(j);
  51. }
  52. }
  53. return real_outputs;
  54. }
  55. void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_,
  56. uint32_t root_graph_id) {
  57. // get inputs
  58. auto kernel_inputs = launch_info_->inputs_;
  59. auto input_size = AnfAlgo::GetInputTensorNum(cnode);
  60. for (size_t j = 0; j < input_size; ++j) {
  61. auto input_kernel = cnode->input(j + 1);
  62. std::string input_kernel_name = GetKernelNodeName(input_kernel);
  63. auto addr = kernel_inputs[j];
  64. auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
  65. // For example, this happens with the Depend op
  66. if (type == kMetaTypeNone) {
  67. continue;
  68. }
  69. #ifdef ENABLE_GPU
  70. auto format = kOpFormat_DEFAULT;
  71. auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
  72. string input_tensor_name = input_kernel_name + ':' + "0";
  73. ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
  74. auto ret =
  75. gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true, root_graph_id);
  76. if (!ret) {
  77. MS_LOG(ERROR) << "LoadMemToHost:"
  78. << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
  79. }
  80. #endif
  81. }
  82. }
  83. void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_,
  84. uint32_t root_graph_id) {
  85. // get outputs
  86. auto kernel_outputs = launch_info_->outputs_;
  87. auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
  88. auto node_name = AnfAlgo::GetCNodeName(cnode);
  89. std::string kernel_name = GetKernelNodeName(cnode);
  90. std::vector<size_t> real_outputs = CheckRealOutput(node_name, output_size);
  91. for (size_t j : real_outputs) {
  92. auto addr = kernel_outputs[j];
  93. auto type = AnfAlgo::GetOutputInferDataType(cnode, j);
  94. // For example, this happens with the Depend op
  95. if (type == kMetaTypeNone) {
  96. continue;
  97. }
  98. #ifdef ENABLE_GPU
  99. auto format = kOpFormat_DEFAULT;
  100. auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
  101. string tensor_name = kernel_name + ':' + std::to_string(j);
  102. ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
  103. auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false, root_graph_id);
  104. if (!ret) {
  105. MS_LOG(ERROR) << "LoadMemToHost:"
  106. << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
  107. }
  108. #endif
  109. }
  110. }
  111. bool CheckReadData(const CNodePtr &cnode) {
  112. auto debugger = Debugger::GetInstance();
  113. if (!debugger) {
  114. return false;
  115. }
  116. bool read_data = false;
  117. auto &dump_json_parser = DumpJsonParser::GetInstance();
  118. bool dump_enabled = debugger->DumpDataEnabledIteration();
  119. std::string kernel_name = GetKernelNodeName(cnode);
  120. if (dump_enabled) {
  121. auto dump_mode = dump_json_parser.dump_mode();
  122. // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list
  123. if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) {
  124. read_data = true;
  125. }
  126. } else if (debugger->debugger_enabled()) {
  127. read_data = debugger->ReadNodeDataRequired(cnode);
  128. }
  129. return read_data;
  130. }
  131. void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
  132. auto debugger = Debugger::GetInstance();
  133. if (!debugger) {
  134. return;
  135. }
  136. auto &dump_json_parser = DumpJsonParser::GetInstance();
  137. bool dump_enabled = debugger->DumpDataEnabledIteration();
  138. auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
  139. MS_EXCEPTION_IF_NULL(kernel_graph);
  140. auto root_graph_id = kernel_graph->root_graph_id();
  141. if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
  142. LoadInputs(cnode, launch_info_, exec_order_, root_graph_id);
  143. }
  144. if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
  145. LoadOutputs(cnode, launch_info_, exec_order_, root_graph_id);
  146. }
  147. // Dump kernel
  148. if (dump_enabled) {
  149. MS_EXCEPTION_IF_NULL(kernel_graph);
  150. auto graph_id = kernel_graph->graph_id();
  151. debugger->DumpSingleNode(cnode, graph_id);
  152. // Clear Dumped data when online debugger is not enabled
  153. if (!debugger->debugger_enabled()) {
  154. debugger->ClearCurrentData();
  155. }
  156. }
  157. // check if the node is last kernel
  158. bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip");
  159. debugger->PostExecuteNode(cnode, last_kernel);
  160. }
  161. std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) {
  162. std::string error_info = "";
  163. bool sink_mode = ConfigManager::GetInstance().dataset_mode() || graph_ptr->IsDatasetGraph();
  164. auto debugger = Debugger::GetInstance();
  165. if (debugger->CheckDebuggerDumpEnabled() && sink_mode) {
  166. error_info = "e2e_dump is not supported on GPU with dataset_sink_mode=True. Please set dataset_sink_mode=False";
  167. }
  168. if (debugger->CheckDebuggerEnabled() && sink_mode) {
  169. error_info = "Debugger is not supported with dataset_sink_mode=True. Please set dataset_sink_mode=False";
  170. }
  171. return error_info;
  172. }
  173. #ifdef ENABLE_D
  174. int32_t DumpDataCallBack(const DumpChunk *dump_chunk, int32_t size) {
  175. MS_LOG(DEBUG) << "ADX DumpDataCallBack is called";
  176. string file_name = dump_chunk->fileName;
  177. uint32_t isLastChunk = dump_chunk->isLastChunk;
  178. // parse chunk header
  179. auto debugger = Debugger::GetInstance();
  180. MS_EXCEPTION_IF_NULL(debugger);
  181. auto dump_data_build = debugger->LoadDumpDataBuilder(file_name);
  182. if (dump_data_build == nullptr) {
  183. MS_LOG(ERROR) << "Failed to load dump data builder for node " << file_name;
  184. return 0;
  185. }
  186. if (!dump_data_build->CopyDumpChunk(dump_chunk)) {
  187. return 1;
  188. }
  189. if (isLastChunk == 1) {
  190. // construct dump data object
  191. debugger::dump::DumpData dump_data;
  192. std::vector<char> data_buf;
  193. if (!dump_data_build->ConstructDumpData(&dump_data, &data_buf)) {
  194. MS_LOG(ERROR) << "Failed to parse data for node " << file_name;
  195. return 0;
  196. }
  197. // convert and save to files
  198. auto separator = file_name.rfind("/");
  199. auto path_name = file_name.substr(0, separator);
  200. auto file_base_name = file_name.substr(separator + 1);
  201. if (file_base_name.rfind("Opdebug.Node_OpDebug.") == 0) {
  202. // save overflow data
  203. E2eDump::DumpOpDebugToFile(file_name, dump_data, data_buf.data());
  204. } else {
  205. // save tensor data
  206. // generate fully qualified file name
  207. // before: op_type.op_name.task_id.stream_id.timestamp
  208. // after: op_type.op_name_no_scope.task_id.stream_id.timestamp
  209. size_t first_dot = file_base_name.find(".");
  210. size_t second_dot = file_base_name.size();
  211. const int kNumDots = 3;
  212. int nth_dot_from_back = 0;
  213. while (nth_dot_from_back != kNumDots && second_dot != std::string::npos) {
  214. second_dot = file_base_name.rfind(".", second_dot - 1);
  215. nth_dot_from_back++;
  216. }
  217. if (first_dot == std::string::npos || second_dot == std::string::npos) {
  218. MS_LOG(ERROR) << "Failed to generate fully qualified file name for " << file_name;
  219. return 0;
  220. }
  221. auto op_type = file_base_name.substr(0, first_dot);
  222. auto task_stream_timestamp = file_base_name.substr(second_dot);
  223. std::string op_name = dump_data.op_name();
  224. auto op_name_no_scope = GetOpNameWithoutScope(op_name, "/");
  225. E2eDump::DumpTensorToFile(path_name + "/" + op_type + "." + op_name_no_scope + task_stream_timestamp, dump_data,
  226. data_buf.data());
  227. }
  228. debugger->ClearDumpDataBuilder(file_name);
  229. }
  230. return 0;
  231. }
  232. #endif
  233. } // namespace mindspore