You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

debugger_utils.cc 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/debugger/debugger_utils.h"
  17. #include <iostream>
  18. #include <vector>
  19. #include <memory>
  20. #include <string>
  21. #include "debug/anf_ir_utils.h"
  22. #include "debug/debugger/debugger.h"
  23. #include "runtime/device/gpu/gpu_device_address.h"
  24. #include "debug/data_dump/dump_json_parser.h"
  25. #include "backend/session/anf_runtime_algorithm.h"
  26. #include "backend/kernel_compiler/kernel.h"
  27. using mindspore::kernel::AddressPtr;
  28. using mindspore::kernel::KernelLaunchInfo;
  29. using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
  30. using KernelGraph = mindspore::session::KernelGraph;
  31. using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
  32. namespace mindspore {
  33. static const size_t PARAMETER_OUTPUT_INDEX = 0;
  34. std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
  35. // define a vector containing real output number
  36. std::vector<int> real_outputs;
  37. // P.BatchNorm is used for training and inference
  38. // can add the filter list for more operators here....
  39. if (node_name == "BatchNorm") {
  40. MS_LOG(INFO) << "loading node named " << node_name;
  41. (void)real_outputs.insert(real_outputs.end(), {0, 3, 4});
  42. } else {
  43. // by default, TensorLoader will load all outputs
  44. for (size_t j = 0; j < output_size; ++j) {
  45. size_t index = j;
  46. real_outputs.push_back(index);
  47. }
  48. }
  49. return real_outputs;
  50. }
  51. void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
  52. // get inputs
  53. auto kernel_inputs = launch_info_->inputs_;
  54. auto input_size = AnfAlgo::GetInputTensorNum(cnode);
  55. for (size_t j = 0; j < input_size; ++j) {
  56. auto input_kernel = cnode->input(j + 1);
  57. std::string input_kernel_name = GetKernelNodeName(input_kernel);
  58. auto addr = kernel_inputs[j];
  59. auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
  60. // For example, this happens with the Depend op
  61. if (type == kMetaTypeNone) {
  62. continue;
  63. }
  64. #ifdef ENABLE_GPU
  65. auto format = kOpFormat_DEFAULT;
  66. auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
  67. string input_tensor_name = input_kernel_name + ':' + "0";
  68. ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
  69. auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true);
  70. if (!ret) {
  71. MS_LOG(ERROR) << "LoadMemToHost:"
  72. << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
  73. }
  74. #endif
  75. }
  76. }
  77. void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
  78. // get outputs
  79. auto kernel_outputs = launch_info_->outputs_;
  80. auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
  81. auto node_name = AnfAlgo::GetCNodeName(cnode);
  82. std::string kernel_name = GetKernelNodeName(cnode);
  83. std::vector<int> real_outputs = CheckRealOutput(node_name, output_size);
  84. for (int j : real_outputs) {
  85. auto addr = kernel_outputs[j];
  86. auto type = AnfAlgo::GetOutputInferDataType(cnode, (size_t)j);
  87. // For example, this happens with the Depend op
  88. if (type == kMetaTypeNone) {
  89. continue;
  90. }
  91. #ifdef ENABLE_GPU
  92. auto format = kOpFormat_DEFAULT;
  93. auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
  94. string tensor_name = kernel_name + ':' + std::to_string(j);
  95. ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
  96. auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false);
  97. if (!ret) {
  98. MS_LOG(ERROR) << "LoadMemToHost:"
  99. << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
  100. }
  101. #endif
  102. }
  103. }
  104. bool CheckReadData(const CNodePtr &cnode) {
  105. auto debugger = Debugger::GetInstance();
  106. if (!debugger) {
  107. return false;
  108. }
  109. bool read_data = false;
  110. auto &dump_json_parser = DumpJsonParser::GetInstance();
  111. bool dump_enabled = debugger->DumpDataEnabledIteration();
  112. std::string kernel_name = GetKernelNodeName(cnode);
  113. if (dump_enabled) {
  114. auto dump_mode = dump_json_parser.dump_mode();
  115. // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list
  116. if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) {
  117. read_data = true;
  118. }
  119. } else if (debugger->debugger_enabled()) {
  120. read_data = debugger->ReadNodeDataRequired(cnode);
  121. }
  122. return read_data;
  123. }
  124. void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
  125. auto debugger = Debugger::GetInstance();
  126. if (!debugger) {
  127. return;
  128. }
  129. auto &dump_json_parser = DumpJsonParser::GetInstance();
  130. bool dump_enabled = debugger->DumpDataEnabledIteration();
  131. if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
  132. LoadInputs(cnode, launch_info_, exec_order_);
  133. }
  134. if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
  135. LoadOutputs(cnode, launch_info_, exec_order_);
  136. }
  137. // Dump kernel
  138. if (dump_enabled) {
  139. auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
  140. MS_EXCEPTION_IF_NULL(kernel_graph);
  141. auto graph_id = kernel_graph->graph_id();
  142. debugger->DumpSingleNode(cnode, graph_id);
  143. // Clear Dumped data when online debugger is not enabled
  144. if (!debugger->debugger_enabled()) {
  145. debugger->ClearCurrentData();
  146. }
  147. }
  148. // check if the node is last kernel
  149. bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip");
  150. debugger->PostExecuteNode(cnode, last_kernel);
  151. }
  152. } // namespace mindspore