You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

e2e_dump.cc 44 kB

4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018
  1. /**
  2. * Copyright 2020-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/data_dump/e2e_dump.h"
  17. #include <unistd.h>
  18. #include <sstream>
  19. #include <algorithm>
  20. #include <map>
  21. #include <memory>
  22. #include <set>
  23. #include <utility>
  24. #include <vector>
  25. #include "debug/data_dump/dump_json_parser.h"
  26. #include "runtime/device/ms_device_shape_transfer.h"
  27. #include "debug/anf_ir_utils.h"
  28. #include "debug/common.h"
  29. #include "backend/common/session/anf_runtime_algorithm.h"
  30. #include "include/common/utils/anfalgo.h"
  31. #include "utils/ms_context.h"
  32. #include "runtime/device/kernel_runtime_manager.h"
  33. #include "include/common/utils/config_manager.h"
  34. #include "utils/file_utils.h"
  35. #include "debug/data_dump/tensor_stat_dump.h"
  36. #include "abstract/utils.h"
  37. #include "runtime/hardware/device_context_manager.h"
  38. #ifdef ENABLE_DEBUGGER
  39. #include "debug/debug_services.h"
  40. #include "debug/tensor_load.h"
  41. #include "debug/debugger/debugger.h"
  42. #endif
  43. namespace mindspore {
  44. #ifdef ENABLE_D
  45. using ProtoFormat = debugger::dump::OutputFormat;
  46. using ProtoDataType = debugger::dump::OutputDataType;
  47. constexpr int kDhaAtomicAddInfoSize = 128;
  48. constexpr int kL2AtomicAddInfoSize = 128;
  49. constexpr int kAiCoreInfoSize = 256;
  50. constexpr int kDhaAtomicAddStatusSize = 256;
  51. constexpr int kL2AtomicAddStatusSize = 256;
  52. constexpr int kUint64Size = sizeof(uint64_t);
  53. const std::set<std::pair<std::string, std::string>> kSuppTransFormatPair = {
  54. // {device format, host format}
  55. {kOpFormat_FRAC_Z, kOpFormat_NCHW}, {kOpFormat_FRAC_NZ, kOpFormat_NCHW},
  56. {kOpFormat_NC1HWC0, kOpFormat_NCHW}, {kOpFormat_C1HWNCoC0, kOpFormat_NCHW},
  57. {kOpFormat_NC1HWC0_C04, kOpFormat_NCHW}, {kOpFormat_NDC1HWC0, kOpFormat_NCHW},
  58. {kOpFormat_FRACTAL_Z_3D, kOpFormat_NCHW}};
  59. const std::map<ProtoFormat, std::string> kFormatToStringMap = {
  60. {ProtoFormat::FORMAT_NCHW, kOpFormat_NCHW},
  61. {ProtoFormat::FORMAT_NHWC, kOpFormat_NHWC},
  62. {ProtoFormat::FORMAT_ND, kOpFormat_ND},
  63. {ProtoFormat::FORMAT_NC1HWC0, kOpFormat_NC1HWC0},
  64. {ProtoFormat::FORMAT_FRACTAL_Z, kOpFormat_FRAC_Z},
  65. {ProtoFormat::FORMAT_NC1HWC0_C04, kOpFormat_NC1HWC0_C04},
  66. {ProtoFormat::FORMAT_FRACTAL_Z_C04, kOpFormat_FRACTAL_Z_C04},
  67. {ProtoFormat::FORMAT_NC1KHKWHWC0, kOpFormat_NC1KHKWHWC0},
  68. {ProtoFormat::FORMAT_HWCN, kOpFormat_HWCN},
  69. {ProtoFormat::FORMAT_NDHWC, kOpFormat_NDHWC},
  70. {ProtoFormat::FORMAT_NCDHW, kOpFormat_NCDHW},
  71. {ProtoFormat::FORMAT_DHWCN, kOpFormat_DHWCN},
  72. {ProtoFormat::FORMAT_DHWNC, kOpFormat_DHWNC},
  73. {ProtoFormat::FORMAT_NDC1HWC0, kOpFormat_NDC1HWC0},
  74. {ProtoFormat::FORMAT_FRACTAL_Z_3D, kOpFormat_FRACTAL_Z_3D},
  75. {ProtoFormat::FORMAT_C1HWNCoC0, kOpFormat_C1HWNCoC0},
  76. {ProtoFormat::FORMAT_FRACTAL_NZ, kOpFormat_FRAC_NZ},
  77. {ProtoFormat::FORMAT_FRACTAL_ZN_LSTM, kOpFormat_FRACTAL_ZN_LSTM}};
  78. const std::map<ProtoDataType, mindspore::TypeId> kDataTypetoMSTypeMap = {
  79. {ProtoDataType::DT_UNDEFINED, mindspore::TypeId::kTypeUnknown},
  80. {ProtoDataType::DT_FLOAT, mindspore::TypeId::kNumberTypeFloat32},
  81. {ProtoDataType::DT_FLOAT16, mindspore::TypeId::kNumberTypeFloat16},
  82. {ProtoDataType::DT_INT8, mindspore::TypeId::kNumberTypeInt8},
  83. {ProtoDataType::DT_UINT8, mindspore::TypeId::kNumberTypeUInt8},
  84. {ProtoDataType::DT_INT16, mindspore::TypeId::kNumberTypeInt16},
  85. {ProtoDataType::DT_UINT16, mindspore::TypeId::kNumberTypeUInt16},
  86. {ProtoDataType::DT_INT32, mindspore::TypeId::kNumberTypeInt32},
  87. {ProtoDataType::DT_INT64, mindspore::TypeId::kNumberTypeInt64},
  88. {ProtoDataType::DT_UINT32, mindspore::TypeId::kNumberTypeUInt32},
  89. {ProtoDataType::DT_UINT64, mindspore::TypeId::kNumberTypeUInt64},
  90. {ProtoDataType::DT_BOOL, mindspore::TypeId::kNumberTypeBool},
  91. {ProtoDataType::DT_DOUBLE, mindspore::TypeId::kNumberTypeFloat64},
  92. {ProtoDataType::DT_STRING, mindspore::TypeId::kObjectTypeString}};
  93. #endif
  94. std::string GenDataFilePath(const CNodePtr &node, const std::string &kernel_name, const std::string &dump_path,
  95. size_t slot, bool is_input) {
  96. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  97. std::string op_name = GetOpNameWithoutScope(kernel_name);
  98. uint64_t timestamp = GetTimeStamp();
  99. uint32_t task_id = 0;
  100. uint32_t stream_id = 0;
  101. std::string tensor_type = is_input ? ".input." : ".output.";
  102. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  103. std::to_string(stream_id) + '.' + std::to_string(timestamp) + tensor_type +
  104. std::to_string(slot);
  105. return file_path;
  106. }
  107. bool E2eDump::IsDeviceTargetGPU() {
  108. auto context = MsContext::GetInstance();
  109. MS_EXCEPTION_IF_NULL(context);
  110. return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
  111. }
  112. /*
  113. * Feature group: Dump.
  114. * Target device group: GPU.
  115. * Runtime category: Old runtime, MindRT.
  116. * Description: This function is for dumping tensor in memory to disk in GPU machine.
  117. */
  118. void E2eDump::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
  119. const device::DeviceAddress &addr, const ShapeVector &int_shapes,
  120. const TypeId &host_type, const TypeId &device_type, bool trans_flag, size_t slot,
  121. const Debugger *debugger) {
  122. #ifdef ENABLE_DEBUGGER
  123. auto format = kOpFormat_DEFAULT;
  124. MS_EXCEPTION_IF_NULL(debugger);
  125. auto ret = debugger->DumpTensorToFile(original_kernel_name, trans_flag, file_path, format, int_shapes, host_type,
  126. device_type, addr.format(), slot);
  127. if (!ret) {
  128. MS_LOG(INFO) << "DumpTensorToFile Failed: flag:" << trans_flag << ", path:" << file_path
  129. << ", host_format:" << format;
  130. }
  131. #endif
  132. }
  133. void E2eDump::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
  134. MS_EXCEPTION_IF_NULL(graph);
  135. auto &dump_json_parser = DumpJsonParser::GetInstance();
  136. if (!dump_json_parser.OutputNeedDump()) {
  137. return;
  138. }
  139. MS_LOG(INFO) << "Start e2e dump output";
  140. bool trans_flag = dump_json_parser.trans_flag();
  141. const auto &apply_kernels = graph->execution_order();
  142. for (const auto &node : apply_kernels) {
  143. MS_EXCEPTION_IF_NULL(node);
  144. std::string kernel_name = GetKernelNodeName(node);
  145. if (!dump_json_parser.NeedDump(kernel_name)) {
  146. continue;
  147. }
  148. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  149. DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  150. }
  151. }
  152. void E2eDump::DumpOutputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
  153. auto &dump_json_parser = DumpJsonParser::GetInstance();
  154. if (!dump_json_parser.OutputNeedDump()) {
  155. return;
  156. }
  157. bool trans_flag = dump_json_parser.trans_flag();
  158. MS_EXCEPTION_IF_NULL(node);
  159. std::string kernel_name = GetKernelNodeName(node);
  160. if (!dump_json_parser.NeedDump(kernel_name)) {
  161. return;
  162. }
  163. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  164. DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  165. }
  166. void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  167. std::string *kernel_name, const Debugger *debugger) {
  168. MS_EXCEPTION_IF_NULL(node);
  169. GetFileKernelName(NOT_NULL(kernel_name));
  170. auto output_size = common::AnfAlgo::GetOutputTensorNum(node);
  171. for (size_t j = 0; j < output_size; ++j) {
  172. if (!AnfAlgo::OutputAddrExist(node, j)) {
  173. continue;
  174. }
  175. auto addr = AnfAlgo::GetOutputAddr(node, j);
  176. MS_EXCEPTION_IF_NULL(addr);
  177. ShapeVector int_shapes;
  178. GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
  179. auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
  180. auto device_type = AnfAlgo::GetOutputDeviceDataType(node, j);
  181. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  182. std::string op_name = GetOpNameWithoutScope(*kernel_name);
  183. uint32_t task_id = 0;
  184. uint32_t stream_id = 0;
  185. uint64_t timestamp = GetTimeStamp();
  186. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  187. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output." +
  188. std::to_string(j);
  189. if (DumpJsonParser::GetInstance().IsStatisticDump() &&
  190. (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag())) {
  191. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, false, j, j);
  192. (void)stat_dump.DumpTensorStatsToFile(GetKernelNodeName(node), dump_path, debugger);
  193. }
  194. if (DumpJsonParser::GetInstance().IsTensorDump()) {
  195. if (IsDeviceTargetGPU()) {
  196. DumpGPUMemToFile(file_path, GetKernelNodeName(node), *addr, int_shapes, type, device_type, trans_flag, j,
  197. debugger);
  198. } else {
  199. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  200. }
  201. }
  202. }
  203. }
  204. void E2eDump::DumpOutputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  205. std::string *kernel_name) {
  206. auto debugger = Debugger::GetInstance();
  207. MS_EXCEPTION_IF_NULL(debugger);
  208. if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) {
  209. MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
  210. return;
  211. }
  212. MS_EXCEPTION_IF_NULL(node);
  213. GetFileKernelName(NOT_NULL(kernel_name));
  214. auto output_size = common::AnfAlgo::GetOutputTensorNum(node);
  215. for (size_t j = 0; j < output_size; ++j) {
  216. if (!AnfAlgo::OutputAddrExist(node, j)) {
  217. continue;
  218. }
  219. auto addr = AnfAlgo::GetOutputAddr(node, j);
  220. MS_EXCEPTION_IF_NULL(addr);
  221. ShapeVector int_shapes;
  222. GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
  223. auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
  224. std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, false);
  225. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  226. }
  227. }
  228. void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
  229. MS_EXCEPTION_IF_NULL(graph);
  230. auto &dump_json_parser = DumpJsonParser::GetInstance();
  231. if (!dump_json_parser.InputNeedDump()) {
  232. return;
  233. }
  234. MS_LOG(INFO) << "Start e2e dump input";
  235. bool trans_flag = dump_json_parser.trans_flag();
  236. const auto &apply_kernels = graph->execution_order();
  237. for (const auto &node : apply_kernels) {
  238. MS_EXCEPTION_IF_NULL(node);
  239. std::string kernel_name = GetKernelNodeName(node);
  240. if (!dump_json_parser.NeedDump(kernel_name)) {
  241. continue;
  242. }
  243. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  244. DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  245. }
  246. }
  247. void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger,
  248. const KernelLaunchInfo *launch_info) {
  249. auto &dump_json_parser = DumpJsonParser::GetInstance();
  250. if (!dump_json_parser.InputNeedDump()) {
  251. return;
  252. }
  253. bool trans_flag = dump_json_parser.trans_flag();
  254. MS_EXCEPTION_IF_NULL(node);
  255. std::string kernel_name = GetKernelNodeName(node);
  256. if (!dump_json_parser.NeedDump(kernel_name)) {
  257. return;
  258. }
  259. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  260. DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger, launch_info);
  261. }
  262. std::shared_ptr<device::DeviceAddress> CreateAscendDeviceAddress(const KernelLaunchInfo *launch_info, size_t index,
  263. TypeId type) {
  264. MS_EXCEPTION_IF_NULL(launch_info);
  265. auto addr_ptr = launch_info->inputs_[index];
  266. auto ms_context = MsContext::GetInstance();
  267. MS_EXCEPTION_IF_NULL(ms_context);
  268. auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
  269. auto device_context =
  270. device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({kAscendDevice, device_id});
  271. auto format = kOpFormat_DEFAULT;
  272. MS_EXCEPTION_IF_NULL(addr_ptr);
  273. return device_context->CreateDeviceAddress(addr_ptr->addr, addr_ptr->size, format, type, ShapeVector());
  274. }
  275. void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  276. std::string *kernel_name, const Debugger *debugger, const KernelLaunchInfo *launch_info) {
  277. MS_EXCEPTION_IF_NULL(node);
  278. GetFileKernelName(NOT_NULL(kernel_name));
  279. auto input_size = common::AnfAlgo::GetInputTensorNum(node);
  280. for (size_t j = 0; j < input_size; ++j) {
  281. auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
  282. auto input = kernel_with_index.first;
  283. auto index = kernel_with_index.second;
  284. if (!AnfAlgo::OutputAddrExist(input, index)) {
  285. continue;
  286. }
  287. std::string tensor_name = GetKernelNodeName(node);
  288. size_t slot = j;
  289. if (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  290. auto input_kernel = node->input(j + 1);
  291. std::string input_kernel_name = GetKernelNodeName(input_kernel);
  292. tensor_name = input_kernel_name;
  293. slot = 0;
  294. }
  295. ShapeVector int_shapes;
  296. GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
  297. auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
  298. auto device_type = AnfAlgo::GetOutputDeviceDataType(input, index);
  299. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  300. std::string op_name = GetOpNameWithoutScope(*kernel_name);
  301. uint64_t timestamp = GetTimeStamp();
  302. uint32_t task_id = 0;
  303. uint32_t stream_id = 0;
  304. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  305. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j);
  306. auto addr = AnfAlgo::GetOutputAddr(input, index);
  307. MS_EXCEPTION_IF_NULL(addr);
  308. if (DumpJsonParser::GetInstance().IsStatisticDump() &&
  309. (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag())) {
  310. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, true, j, slot);
  311. (void)stat_dump.DumpTensorStatsToFile(tensor_name, dump_path, debugger);
  312. }
  313. if (DumpJsonParser::GetInstance().IsTensorDump()) {
  314. if (IsDeviceTargetGPU()) {
  315. DumpGPUMemToFile(file_path, tensor_name, *addr, int_shapes, type, device_type, trans_flag, slot, debugger);
  316. } else if (Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  317. // load address from launch_info when it's Ascend Kernel by kernel mode.
  318. auto ascend_device_addr = CreateAscendDeviceAddress(launch_info, j, type);
  319. DumpMemToFile(file_path, *ascend_device_addr, int_shapes, type, trans_flag);
  320. } else {
  321. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  322. }
  323. }
  324. }
  325. }
  326. void E2eDump::DumpInputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  327. std::string *kernel_name) {
  328. auto debugger = Debugger::GetInstance();
  329. MS_EXCEPTION_IF_NULL(debugger);
  330. if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) {
  331. MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
  332. return;
  333. }
  334. MS_EXCEPTION_IF_NULL(node);
  335. GetFileKernelName(NOT_NULL(kernel_name));
  336. auto input_size = common::AnfAlgo::GetInputTensorNum(node);
  337. for (size_t j = 0; j < input_size; ++j) {
  338. auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
  339. auto input = kernel_with_index.first;
  340. auto index = kernel_with_index.second;
  341. if (!AnfAlgo::OutputAddrExist(input, index)) {
  342. continue;
  343. }
  344. auto addr = AnfAlgo::GetOutputAddr(input, index);
  345. MS_EXCEPTION_IF_NULL(addr);
  346. ShapeVector int_shapes;
  347. GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
  348. auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
  349. std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, true);
  350. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  351. }
  352. }
  353. void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
  354. bool trans_flag, const Debugger *debugger) {
  355. MS_EXCEPTION_IF_NULL(anf_node);
  356. auto &dump_json_parser = DumpJsonParser::GetInstance();
  357. if ((!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) || IsValueNode<StringImm>(anf_node)) {
  358. return;
  359. }
  360. std::string node_name = GetKernelNodeName(anf_node);
  361. if (!dump_json_parser.NeedDump(node_name)) {
  362. return;
  363. }
  364. DumpJsonParser::GetInstance().MatchKernel(node_name);
  365. GetFileKernelName(NOT_NULL(&node_name));
  366. std::string dump_name = node_name;
  367. const std::string cst_prefix = "Default--";
  368. if (anf_node->isa<ValueNode>()) {
  369. if (dump_name.find(cst_prefix) == std::string::npos) {
  370. MS_LOG(INFO) << "Incorrect constant format: " << dump_name;
  371. return;
  372. }
  373. dump_name = node_name.substr(cst_prefix.length());
  374. trans_flag = false;
  375. }
  376. // check if output address exists, if not, return;
  377. if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) {
  378. return;
  379. }
  380. auto addr = AnfAlgo::GetOutputAddr(anf_node, output_index);
  381. MS_EXCEPTION_IF_NULL(addr);
  382. ShapeVector int_shapes;
  383. GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes), trans_flag);
  384. auto type = common::AnfAlgo::GetOutputInferDataType(anf_node, output_index);
  385. auto device_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
  386. uint64_t timestamp = GetTimeStamp();
  387. uint32_t task_id = 0;
  388. uint32_t stream_id = 0;
  389. std::string file_path = dump_path + "/Parameter." + dump_name + '.' + std::to_string(task_id) + '.' +
  390. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
  391. if (IsDeviceTargetGPU()) {
  392. if (dump_json_parser.IsStatisticDump()) {
  393. TensorStatDump stat_dump("Parameter", dump_name, task_id, stream_id, timestamp, false, 0, 0);
  394. (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
  395. }
  396. if (dump_json_parser.IsTensorDump()) {
  397. DumpGPUMemToFile(file_path, node_name, *addr, int_shapes, type, device_type, trans_flag, 0, debugger);
  398. }
  399. } else {
  400. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  401. }
  402. }
  403. /*
  404. * Feature group: Dump.
  405. * Target device group: Ascend, GPU.
  406. * Runtime category: MindRT.
  407. * Description: This function is similar to DumpSingleAnfNode function but it is only for dumping parameters in mindRT.
  408. * This function uses GetParameterInfo to get dump info for the parameter node.
  409. */
  410. void E2eDump::DumpSingleParameterNode(const AnfNodePtr &anf_node, const std::string &dump_path, bool trans_flag,
  411. const Debugger *debugger) {
  412. MS_EXCEPTION_IF_NULL(anf_node);
  413. auto &dump_json_parser = DumpJsonParser::GetInstance();
  414. std::string node_name = GetKernelNodeName(anf_node);
  415. if (!anf_node->isa<Parameter>() || !dump_json_parser.NeedDump(node_name) || !dump_json_parser.OutputNeedDump()) {
  416. return;
  417. }
  418. DumpJsonParser::GetInstance().MatchKernel(node_name);
  419. GetFileKernelName(NOT_NULL(&node_name));
  420. ShapeVector int_shapes;
  421. TypeId type;
  422. TypeId device_type;
  423. auto addr = GetParameterInfo(anf_node, NOT_NULL(&int_shapes), NOT_NULL(&type), NOT_NULL(&device_type));
  424. if (addr == nullptr) {
  425. MS_LOG(DEBUG) << "Skip node: " << node_name << ". Parameter data is not available for mindRT.";
  426. return;
  427. }
  428. uint64_t timestamp = GetTimeStamp();
  429. uint32_t task_id = 0;
  430. uint32_t stream_id = 0;
  431. std::string file_path = dump_path + "/Parameter." + node_name + '.' + std::to_string(task_id) + '.' +
  432. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
  433. if (IsDeviceTargetGPU()) {
  434. if (dump_json_parser.IsStatisticDump()) {
  435. TensorStatDump stat_dump("Parameter", node_name, task_id, stream_id, timestamp, false, 0, 0);
  436. (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
  437. }
  438. if (dump_json_parser.IsTensorDump()) {
  439. DumpGPUMemToFile(file_path, node_name, *addr, int_shapes, type, device_type, trans_flag, 0, debugger);
  440. }
  441. } else {
  442. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  443. }
  444. }
  445. void E2eDump::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path,
  446. const Debugger *debugger) {
  447. MS_EXCEPTION_IF_NULL(graph);
  448. auto &dump_json_parser = DumpJsonParser::GetInstance();
  449. if (!dump_json_parser.OutputNeedDump()) {
  450. return;
  451. }
  452. MS_LOG(INFO) << "Start e2e dump parameters";
  453. bool trans_flag = dump_json_parser.trans_flag();
  454. // dump parameters
  455. const auto &parameters = graph->inputs();
  456. for (auto &item : parameters) {
  457. DumpSingleAnfNode(item, PARAMETER_OUTPUT_INDEX, dump_path, trans_flag, debugger);
  458. }
  459. }
  460. void E2eDump::DumpConstantData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
  461. MS_EXCEPTION_IF_NULL(graph);
  462. auto &dump_json_parser = DumpJsonParser::GetInstance();
  463. if (!IsDeviceTargetGPU() || !dump_json_parser.e2e_dump_enabled()) {
  464. return;
  465. }
  466. uint32_t graph_id = graph->graph_id();
  467. std::string cst_path = GenerateDumpPath(graph_id, rank_id, true);
  468. if (!Common::FileExists(cst_path)) {
  469. DumpConstantData(graph, cst_path, debugger);
  470. }
  471. }
  472. void E2eDump::DumpConstantData(const session::KernelGraph *graph, const std::string &cst_dump_path,
  473. const Debugger *debugger) {
  474. // Dump constant to npy file
  475. MS_EXCEPTION_IF_NULL(graph);
  476. auto &dump_json_parser = DumpJsonParser::GetInstance();
  477. MS_LOG(INFO) << "DumpConstants. Current iteration is " << dump_json_parser.cur_dump_iter();
  478. MS_LOG(INFO) << "Current graph id is " << graph->graph_id();
  479. if (!dump_json_parser.OutputNeedDump()) {
  480. return;
  481. }
  482. const auto value_nodes = graph->graph_value_nodes();
  483. for (auto &item : value_nodes) {
  484. DumpSingleAnfNode(item, VALUE_NODE_OUTPUT_INDEX, cst_dump_path, false, debugger);
  485. }
  486. }
  487. /*
  488. * Feature group: Dump.
  489. * Target device group: Ascend, GPU.
  490. * Runtime category: Old runtime.
  491. * Description: This function is for updating dump iteration for GPU and ascend old runtime.
  492. */
  493. void E2eDump::UpdateIterOldRTDump(const session::KernelGraph *graph) {
  494. MS_EXCEPTION_IF_NULL(graph);
  495. auto &dump_json_parser = DumpJsonParser::GetInstance();
  496. uint32_t graph_id = graph->graph_id();
  497. if (IsDeviceTargetGPU()) {
  498. if (starting_graph_id == INT32_MAX) {
  499. starting_graph_id = graph_id;
  500. } else if (starting_graph_id == graph_id && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
  501. // Update dump iter for mindrt runtime is done using UpdateIterGPUDump().
  502. // Update dump iter for GPU old runtime.
  503. dump_json_parser.UpdateDumpIter();
  504. }
  505. return;
  506. }
  507. // If device target is Ascend
  508. if (graph->IsDatasetGraph()) {
  509. MS_LOG(INFO) << "No need to update iteration for dataset graph.";
  510. return;
  511. }
  512. // In multi network scripts, dump iter is equal to the number of networks that have been executed so far.
  513. dump_json_parser.UpdateDumpIter();
  514. }
  515. /*
  516. * Feature group: Dump.
  517. * Target device group: Ascend, GPU.
  518. * Runtime category: MindRT.
  519. * Description: This function is for updating dump iteration for GPU and ascend MindRT dump. Please note that dump with
  520. * dataset_sink_mode = True is not supported for GPU.
  521. */
  522. void E2eDump::UpdateIterMindRTDump() {
  523. auto debugger = Debugger::GetInstance();
  524. // Dataset graph is always the first graph in the list when dataset_sink_mode is true.
  525. auto graph = (debugger->GetStepGraphPtrList())[0];
  526. auto context = MsContext::GetInstance();
  527. MS_EXCEPTION_IF_NULL(context);
  528. if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice && graph->IsDatasetGraph()) {
  529. MS_LOG(INFO) << "No need to update iteration for dataset graph.";
  530. return;
  531. }
  532. // update dump iter for GPU and kernel by kernel ascend dump.
  533. DumpJsonParser::GetInstance().UpdateDumpIter();
  534. }
  535. /*
  536. * Feature group: Dump.
  537. * Target device group: Ascend, GPU.
  538. * Runtime category: Old runtime, MindRT.
  539. * Description: Generates graph history files (dumping all the iteration numbers in which the graph was executed) for
  540. * the given graph and rank_id. If dataset_sink_mode is true for async dump in ascend, this function is called once per
  541. * each epoch and dumps all the iterations in the epoch to the graph history file.
  542. */
  543. void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
  544. auto &json_parser = DumpJsonParser::GetInstance();
  545. if (!(json_parser.async_dump_enabled() || json_parser.e2e_dump_enabled())) {
  546. return;
  547. }
  548. bool sink_mode = (ConfigManager::GetInstance().dataset_mode() || graph->IsDatasetGraph());
  549. auto iter_num = SizeToInt(LongToSize(ConfigManager::GetInstance().iter_num()));
  550. if (graph->IsDatasetGraph()) {
  551. MS_LOG(INFO) << "graph: " << graph->graph_id() << " is dataset graph, not creating graph history file.";
  552. return;
  553. }
  554. if (!IsDeviceTargetGPU() && (graph->graph_id() != graph->root_graph_id())) {
  555. // when device target is ascend, we only dump graph run iter for the root graph.
  556. return;
  557. }
  558. std::string execution_order_path = json_parser.path() + "/rank_" + std::to_string(rank_id) + "/execution_order/";
  559. std::string graph_str =
  560. IsDeviceTargetGPU() ? std::to_string(graph->graph_id()) : std::to_string(graph->root_graph_id());
  561. std::string file_name_to_check = execution_order_path + "/ms_global_execution_order_graph_" + graph_str + ".csv";
  562. auto real_path = Common::CreatePrefixPath(file_name_to_check);
  563. if (!real_path.has_value()) {
  564. MS_LOG(WARNING) << "Check file path: " << file_name_to_check << " failed.";
  565. return;
  566. }
  567. std::string file_name = real_path.value();
  568. ChangeFileMode(file_name, S_IWUSR);
  569. std::ofstream fout(file_name, std::ofstream::app);
  570. if (!fout.is_open()) {
  571. MS_LOG(WARNING) << "Open file for saving graph global execution order failed.";
  572. return;
  573. }
  574. if (sink_mode && json_parser.async_dump_enabled() && !Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  575. // for async dump when sink_mode = true, cur_dump_iter() = current_epoch
  576. // dump history for all iterations in the epoch
  577. Debugger::GetInstance()->UpdateGraphIterMap(graph->graph_id(), iter_num);
  578. auto graph_iter_map = Debugger::GetInstance()->GetGraphIterMap();
  579. auto step_per_epoch = IntToSize(graph_iter_map[graph->graph_id()]);
  580. for (size_t i = 0; i < step_per_epoch; i++) {
  581. auto step = (json_parser.cur_dump_iter() * step_per_epoch) + i;
  582. fout << (std::to_string(step) + "\n");
  583. }
  584. } else {
  585. fout << std::to_string(json_parser.cur_dump_iter()) + "\n";
  586. }
  587. fout.close();
  588. ChangeFileMode(file_name, S_IRUSR);
  589. }
  590. /*
  591. * Feature group: Dump.
  592. * Target device group: Ascend, GPU.
  593. * Runtime category: Old runtime, MindRT.
  594. * Description: This function is for dumping the whole graph. It is used for old runtime in GPU and Ascend and
  595. * super-kernel mindRT in Ascend.
  596. */
  597. void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
  598. MS_EXCEPTION_IF_NULL(graph);
  599. bool success = false;
  600. auto &dump_json_parser = DumpJsonParser::GetInstance();
  601. uint32_t graph_id = graph->graph_id();
  602. if (!dump_json_parser.e2e_dump_enabled()) {
  603. return;
  604. }
  605. if (dump_json_parser.GetIterDumpFlag()) {
  606. MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
  607. MS_LOG(INFO) << "Current graph id is " << graph_id;
  608. std::string dump_path = GenerateDumpPath(graph_id, rank_id);
  609. if (dump_json_parser.IsStatisticDump()) {
  610. (void)TensorStatDump::OpenStatisticsFile(dump_path);
  611. }
  612. DumpInput(graph, dump_path, debugger);
  613. DumpOutput(graph, dump_path, debugger);
  614. if (!MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
  615. // Dump parameters for old runtime. For mindRT it is done in PostExecuteGraphDebugger.
  616. DumpParameters(graph, dump_path, debugger);
  617. // DumpConstantData for GPU old runtime.
  618. DumpConstantData(graph, rank_id, debugger);
  619. }
  620. if (dump_json_parser.IsStatisticDump()) {
  621. CsvWriter::GetInstance().CloseFile();
  622. }
  623. success = true;
  624. }
  625. if (success) {
  626. MS_LOG(DEBUG) << "E2eDump Dump Data completed!";
  627. } else {
  628. MS_LOG(DEBUG) << "E2eDump Dump has not occurred!";
  629. }
  630. }
  631. /*
  632. * Feature group: Dump.
  633. * Target device group: Ascend, GPU.
  634. * Runtime category: MindRT.
  635. * Description: This function is for dumping a single node. It is used for mindrt in GPU and Ascend kernel-by-kernel.
  636. */
  637. bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger,
  638. const KernelLaunchInfo *launch_info) {
  639. bool success = false;
  640. auto &dump_json_parser = DumpJsonParser::GetInstance();
  641. if (dump_json_parser.DumpEnabledForIter()) {
  642. std::string dump_path = GenerateDumpPath(graph_id, rank_id);
  643. DumpInputSingleNode(node, dump_path, debugger, launch_info);
  644. DumpOutputSingleNode(node, dump_path, debugger);
  645. success = true;
  646. }
  647. return success;
  648. }
  649. /*
  650. * Feature group: Dump.
  651. * Target device group: Ascend, GPU.
  652. * Runtime category: MindRT.
  653. * Description: This function is for dumping all the parameters in the current root graph for GPU, Ascend superkernel
  654. * (e2e dump) and Ascend kernel-by-kernel (e2e and async dump).
  655. */
  656. void E2eDump::DumpParametersData(uint32_t rank_id, const Debugger *debugger) {
  657. uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
  658. auto &dump_json_parser = DumpJsonParser::GetInstance();
  659. if (dump_json_parser.async_dump_enabled() && !debugger->GetAscendKernelByKernelFlag()) {
  660. // Dump parameters for mindRT in async dump only for kernel by kernel mode.
  661. return;
  662. }
  663. if (dump_json_parser.DumpEnabledForIter()) {
  664. MS_LOG(INFO) << "DumpParameters. Current iteration is " << dump_json_parser.cur_dump_iter();
  665. MS_LOG(INFO) << "Current root graph id is " << root_graph_id;
  666. std::string dump_path = GenerateDumpPath(root_graph_id, rank_id);
  667. bool trans_flag = dump_json_parser.trans_flag();
  668. for (auto &item : debugger->GetParametersMindRT()) {
  669. DumpSingleParameterNode(item, dump_path, trans_flag, debugger);
  670. }
  671. }
  672. }
  673. #ifdef ENABLE_D
  674. template <typename T>
  675. dump_data_t ParseAttrsFromDumpData(const std::string &dump_path, char *data_ptr, const T &tensor, const std::string &io,
  676. uint32_t slot) {
  677. // get data type
  678. auto iter_dtype = kDataTypetoMSTypeMap.find(tensor.data_type());
  679. if (iter_dtype == kDataTypetoMSTypeMap.end()) {
  680. MS_LOG(INFO) << "Unsupported data type for tensor " << dump_path << ": unknown(" << tensor.data_type() << ")";
  681. return dump_data_t{};
  682. }
  683. auto data_type = iter_dtype->second;
  684. // get format
  685. auto iter_fmt = kFormatToStringMap.find(tensor.format());
  686. if (iter_fmt == kFormatToStringMap.end()) {
  687. MS_LOG(INFO) << "Unsupported tensor format for tensor " << dump_path << ": unknown(" << tensor.format() << ")";
  688. return dump_data_t{};
  689. }
  690. std::string device_format = iter_fmt->second;
  691. // get shape
  692. ShapeVector shape_d;
  693. (void)std::transform(tensor.shape().dim().begin(), tensor.shape().dim().end(), std::back_inserter(shape_d),
  694. SizeToLong);
  695. ShapeVector shape_to;
  696. (void)std::transform(tensor.original_shape().dim().begin(), tensor.original_shape().dim().end(),
  697. std::back_inserter(shape_to), SizeToLong);
  698. // get size and sub_format
  699. size_t data_size = (size_t)tensor.size();
  700. int32_t sub_format = tensor.sub_format();
  701. return dump_data_t{dump_path, data_ptr, data_type, device_format, shape_d, shape_to, data_size, sub_format, io, slot};
  702. }
  703. /*
  704. * Feature group: Dump.
  705. * Target device group: Ascend.
  706. * Runtime category: Old runtime, MindRT.
  707. * Description: This function is for ascend A+M dump only. It parses and converts each slot of tensor in DumpData object
  708. * and dump the tensor data in npy file or statistic data in csv file.
  709. */
  710. void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
  711. char *data_ptr) {
  712. std::vector<dump_data_t> dump_tensor_vec;
  713. // dump input tensors
  714. std::vector<debugger::dump::OpInput> input_tensors(dump_data.input().begin(), dump_data.input().end());
  715. uint64_t offset = 0;
  716. for (uint32_t slot = 0; slot < input_tensors.size(); slot++) {
  717. auto in_tensor = input_tensors[slot];
  718. dump_tensor_vec.push_back(ParseAttrsFromDumpData(dump_path, data_ptr + offset, in_tensor, "input", slot));
  719. offset += in_tensor.size();
  720. }
  721. // dump output tensors
  722. std::vector<debugger::dump::OpOutput> output_tensors(dump_data.output().begin(), dump_data.output().end());
  723. for (uint32_t slot = 0; slot < output_tensors.size(); slot++) {
  724. auto out_tensor = output_tensors[slot];
  725. dump_tensor_vec.push_back(ParseAttrsFromDumpData(dump_path, data_ptr + offset, out_tensor, "output", slot));
  726. offset += out_tensor.size();
  727. }
  728. // assign slot conversion task to different thread.
  729. if (dump_tensor_vec.empty()) {
  730. return;
  731. }
  732. constexpr int kMaxTensorSize = 1048576;
  733. if (offset <= kMaxTensorSize) {
  734. // If the total tensor size is less than 1Mb, do it in single thread.
  735. ConvertFormatForTensors(&dump_tensor_vec, 0, dump_tensor_vec.size() - 1);
  736. } else {
  737. auto default_num_workers = std::max<uint32_t>(1, std::thread::hardware_concurrency() / 4);
  738. auto num_threads = std::min<uint32_t>(default_num_workers, dump_tensor_vec.size());
  739. uint32_t task_size = dump_tensor_vec.size() / num_threads;
  740. uint32_t remainder = dump_tensor_vec.size() % num_threads;
  741. std::vector<std::thread> threads;
  742. threads.reserve(num_threads);
  743. MS_LOG(INFO) << "Number of threads used for A+M dump: " << num_threads;
  744. for (size_t t = 0; t < threads.capacity(); t++) {
  745. uint32_t start_idx = t * task_size;
  746. uint32_t end_idx = start_idx + task_size - 1;
  747. if (t == num_threads - 1) {
  748. end_idx += remainder;
  749. }
  750. threads.emplace_back(std::thread(&E2eDump::ConvertFormatForTensors, &dump_tensor_vec, start_idx, end_idx));
  751. }
  752. for (auto &thd : threads) {
  753. if (thd.joinable()) {
  754. thd.join();
  755. }
  756. }
  757. }
  758. for (auto &dump_tensor_item : dump_tensor_vec) {
  759. (void)DumpTensorStatsIfNeeded(dump_tensor_item);
  760. }
  761. }
  762. void E2eDump::ConvertFormatForTensors(std::vector<dump_data_t> *dump_tensor_vec, uint32_t start_idx, uint32_t end_idx) {
  763. for (uint32_t idx = start_idx; idx <= end_idx; idx++) {
  764. auto &dump_data_obj = dump_tensor_vec->at(idx);
  765. auto succ = ConvertFormatForOneTensor(&dump_data_obj);
  766. if (!succ) {
  767. MS_LOG(INFO) << "Failed to convert format for tensor " << dump_data_obj.dump_file_path << "."
  768. << dump_data_obj.in_out_str << "." << dump_data_obj.slot;
  769. }
  770. (void)DumpTensorDataIfNeeded(dump_data_obj);
  771. }
  772. }
  773. /*
  774. * Feature group: Dump.
  775. * Target device group: Ascend.
  776. * Runtime category: Old runtime, MindRT.
  777. * Description: It serves for A+M dump. Save tensor into dump path as configured.
  778. */
  779. bool E2eDump::DumpTensorDataIfNeeded(const dump_data_t &dump_tensor_info) {
  780. if (!DumpJsonParser::GetInstance().IsTensorDump()) {
  781. return true;
  782. }
  783. // dump_path: dump_dir/op_type.op_name.task_id.stream_id.timestamp
  784. std::ostringstream dump_path_ss;
  785. dump_path_ss << dump_tensor_info.dump_file_path << "." << dump_tensor_info.in_out_str << "." << dump_tensor_info.slot
  786. << "." << dump_tensor_info.format;
  787. std::string dump_path_slot = dump_path_ss.str();
  788. std::shared_ptr<tensor::Tensor> trans_buf = dump_tensor_info.trans_buf;
  789. bool dump_succ = false;
  790. if (trans_buf) {
  791. dump_succ = DumpJsonParser::DumpToFile(dump_path_slot, trans_buf->data_c(), trans_buf->Size(),
  792. dump_tensor_info.host_shape, dump_tensor_info.data_type);
  793. } else {
  794. dump_succ = DumpJsonParser::DumpToFile(dump_path_slot, dump_tensor_info.data_ptr, dump_tensor_info.data_size,
  795. dump_tensor_info.host_shape, dump_tensor_info.data_type);
  796. }
  797. return dump_succ;
  798. }
  799. /*
  800. * Feature group: Dump.
  801. * Target device group: Ascend.
  802. * Runtime category: Old runtime, MindRT.
  803. * Description: It serves for A+M dump. Save statistic of the tensor data into dump path as configured.
  804. */
  805. bool E2eDump::DumpTensorStatsIfNeeded(const dump_data_t &dump_tensor_info) {
  806. // dump_path: dump_dir/op_type.op_name.task_id.stream_id.timestamp
  807. if (!DumpJsonParser::GetInstance().IsStatisticDump()) {
  808. return true;
  809. }
  810. std::string dump_path = dump_tensor_info.dump_file_path;
  811. size_t pos = dump_path.rfind("/");
  812. std::string file_name = dump_path.substr(pos + 1);
  813. size_t first_dot = file_name.find(".");
  814. size_t fourth_dot = file_name.rfind(".");
  815. size_t third_dot = file_name.rfind(".", fourth_dot - 1);
  816. size_t second_dot = file_name.rfind(".", third_dot - 1);
  817. if (first_dot == std::string::npos || second_dot == std::string::npos || third_dot == std::string::npos ||
  818. first_dot == second_dot) {
  819. MS_LOG(ERROR) << "Dump path " << dump_path << " received is not well formed";
  820. return false;
  821. }
  822. std::string op_type = file_name.substr(0, first_dot);
  823. std::string op_name = file_name.substr(first_dot + 1, second_dot - first_dot - 1);
  824. std::string task_id = file_name.substr(second_dot + 1, third_dot - second_dot - 1);
  825. std::string stream_id = file_name.substr(third_dot + 1, fourth_dot - third_dot - 1);
  826. std::string timestamp = file_name.substr(fourth_dot + 1);
  827. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, dump_tensor_info.in_out_str,
  828. dump_tensor_info.slot, dump_tensor_info.slot);
  829. std::shared_ptr<TensorData> data = std::make_shared<TensorData>();
  830. if (dump_tensor_info.data_type <= TypeId::kNumberTypeBegin ||
  831. dump_tensor_info.data_type >= TypeId::kNumberTypeComplex64) {
  832. MS_LOG(ERROR) << "Data type of operator " << file_name << " is not supported by statistic dump";
  833. return false;
  834. }
  835. std::shared_ptr<tensor::Tensor> trans_buf = dump_tensor_info.trans_buf;
  836. if (trans_buf) {
  837. data->SetByteSize(trans_buf->Size());
  838. data->SetDataPtr(static_cast<char *>(trans_buf->data_c()));
  839. } else {
  840. data->SetByteSize(dump_tensor_info.data_size);
  841. data->SetDataPtr(dump_tensor_info.data_ptr);
  842. }
  843. data->SetType(dump_tensor_info.data_type);
  844. data->SetShape(dump_tensor_info.host_shape);
  845. return stat_dump.DumpTensorStatsToFile(dump_path.substr(0, pos), data);
  846. }
  847. /*
  848. * Feature group: Dump.
  849. * Target device group: Ascend.
  850. * Runtime category: Old runtime, MindRT.
  851. * Description: It serves for A+M dump. Convert tensor from device format to host format if needed.
  852. */
  853. bool E2eDump::ConvertFormatForOneTensor(dump_data_t *dump_tensor_info) {
  854. bool trans_success = false;
  855. auto trans_buf = std::make_shared<tensor::Tensor>(dump_tensor_info->data_type, dump_tensor_info->host_shape);
  856. // convert format to host format. It can be either NCHW or ND (non 4-dimemsions).
  857. const uint8_t kNumFourDim = 4;
  858. std::string host_format;
  859. std::string device_format = dump_tensor_info->format;
  860. if (dump_tensor_info->host_shape.size() == kNumFourDim) {
  861. host_format = kOpFormat_NCHW;
  862. } else {
  863. host_format = kOpFormat_ND;
  864. }
  865. if (device_format != host_format) {
  866. auto iter = kSuppTransFormatPair.find(std::make_pair(device_format, host_format));
  867. if (iter == kSuppTransFormatPair.end()) {
  868. MS_LOG(INFO) << "Do not support convert from format " << device_format << " to " << host_format << " for tensor "
  869. << dump_tensor_info->dump_file_path << "." << dump_tensor_info->in_out_str << "."
  870. << dump_tensor_info->slot;
  871. } else {
  872. const trans::FormatArgs format_args{dump_tensor_info->data_ptr,
  873. dump_tensor_info->data_size,
  874. host_format,
  875. device_format,
  876. dump_tensor_info->host_shape,
  877. dump_tensor_info->device_shape,
  878. dump_tensor_info->data_type};
  879. auto group = dump_tensor_info->sub_format > 1 ? dump_tensor_info->sub_format : 1;
  880. trans_success = trans::TransFormatFromDeviceToHost(format_args, trans_buf->data_c(), group);
  881. if (!trans_success) {
  882. MS_LOG(ERROR) << "Trans format failed.";
  883. }
  884. }
  885. }
  886. if (trans_success) {
  887. dump_tensor_info->format = host_format;
  888. dump_tensor_info->trans_buf = trans_buf;
  889. }
  890. return trans_success;
  891. }
  892. uint64_t UnpackUint64Value(char *ptr) {
  893. #if defined(__APPLE__)
  894. return *reinterpret_cast<const uint64_t *>(ptr);
  895. #else
  896. return le64toh(*reinterpret_cast<const uint64_t *>(ptr));
  897. #endif
  898. }
  899. std::string IntToHexString(const uint64_t value) {
  900. std::stringstream ss;
  901. ss << "0x" << std::hex << value;
  902. return ss.str();
  903. }
  904. nlohmann::json E2eDump::ParseOverflowInfo(char *data_ptr) {
  905. uint32_t index = 0;
  906. uint64_t model_id = UnpackUint64Value(data_ptr);
  907. index += kUint64Size;
  908. uint64_t stream_id = UnpackUint64Value(data_ptr + index);
  909. index += kUint64Size;
  910. uint64_t task_id = UnpackUint64Value(data_ptr + index);
  911. index += kUint64Size;
  912. uint64_t task_type = UnpackUint64Value(data_ptr + index);
  913. index += kUint64Size;
  914. uint64_t pc_start = UnpackUint64Value(data_ptr + index);
  915. index += kUint64Size;
  916. uint64_t para_base = UnpackUint64Value(data_ptr + index);
  917. nlohmann::json overflow_info;
  918. overflow_info["model_id"] = model_id;
  919. overflow_info["stream_id"] = stream_id;
  920. overflow_info["task_id"] = task_id;
  921. overflow_info["task_type"] = task_type;
  922. overflow_info["pc_start"] = IntToHexString(pc_start);
  923. overflow_info["para_base"] = IntToHexString(para_base);
  924. return overflow_info;
  925. }
  926. /*
  927. * Feature group: Dump.
  928. * Target device group: Ascend.
  929. * Runtime category: Old runtime, MindRT.
  930. * Description: This function is for Ascend A+M dump. It parses and dump op overflow info in json file.
  931. */
  932. void E2eDump::DumpOpDebugToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
  933. char *data_ptr) {
  934. std::string out_path = dump_path + ".output.";
  935. std::vector<debugger::dump::OpOutput> op_debug(dump_data.output().begin(), dump_data.output().end());
  936. for (uint32_t slot = 0; slot < op_debug.size(); slot++) {
  937. uint32_t index = 0;
  938. // parse DHA Atomic Add info
  939. nlohmann::json dha_atomic_add_info = ParseOverflowInfo(data_ptr + index);
  940. index += kDhaAtomicAddInfoSize;
  941. // parse L2 Atomic Add info
  942. nlohmann::json l2_atomic_add_info = ParseOverflowInfo(data_ptr + index);
  943. index += kL2AtomicAddInfoSize;
  944. // parse AICore info
  945. nlohmann::json ai_core_info = ParseOverflowInfo(data_ptr + index);
  946. index += kAiCoreInfoSize;
  947. // parse DHA Atomic Add status
  948. dha_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
  949. index += kDhaAtomicAddStatusSize;
  950. // parse L2 Atomic Add status
  951. l2_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
  952. index += kL2AtomicAddStatusSize;
  953. // parse AICore status
  954. uint64_t kernel_code = UnpackUint64Value(data_ptr + index);
  955. index += kUint64Size;
  956. uint64_t block_idx = UnpackUint64Value(data_ptr + index);
  957. index += kUint64Size;
  958. uint64_t status = UnpackUint64Value(data_ptr + index);
  959. ai_core_info["kernel_code"] = IntToHexString(kernel_code);
  960. ai_core_info["block_idx"] = block_idx;
  961. ai_core_info["status"] = status;
  962. nlohmann::json opdebug_data;
  963. opdebug_data["DHA Atomic Add"] = dha_atomic_add_info;
  964. opdebug_data["L2 Atomic Add"] = l2_atomic_add_info;
  965. opdebug_data["AI Core"] = ai_core_info;
  966. // save json to file
  967. DumpToFile(out_path + std::to_string(slot) + ".json", opdebug_data.dump());
  968. }
  969. }
  970. #endif // ENABLE_D
  971. } // namespace mindspore