You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

e2e_dump.cc 44 kB

5 years ago
4 years ago
4 years ago
4 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015
  1. /**
  2. * Copyright 2020-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/data_dump/e2e_dump.h"
  17. #include <unistd.h>
  18. #include <sstream>
  19. #include <algorithm>
  20. #include <map>
  21. #include <memory>
  22. #include <set>
  23. #include <utility>
  24. #include <vector>
  25. #include "debug/data_dump/dump_json_parser.h"
  26. #include "runtime/device/ms_device_shape_transfer.h"
  27. #include "include/common/debug/anf_dump_utils.h"
  28. #include "include/common/debug/common.h"
  29. #include "backend/common/session/anf_runtime_algorithm.h"
  30. #include "include/common/utils/anfalgo.h"
  31. #include "utils/ms_context.h"
  32. #include "runtime/device/kernel_runtime_manager.h"
  33. #include "include/common/utils/config_manager.h"
  34. #include "utils/file_utils.h"
  35. #include "debug/data_dump/tensor_stat_dump.h"
  36. #include "abstract/utils.h"
  37. #include "runtime/hardware/device_context_manager.h"
  38. #ifdef ENABLE_DEBUGGER
  39. #include "debug/debug_services.h"
  40. #include "debug/tensor_load.h"
  41. #include "debug/debugger/debugger.h"
  42. #endif
  43. namespace mindspore {
  44. #ifdef ENABLE_D
  45. using ProtoFormat = debugger::dump::OutputFormat;
  46. using ProtoDataType = debugger::dump::OutputDataType;
  47. constexpr int kDhaAtomicAddInfoSize = 128;
  48. constexpr int kL2AtomicAddInfoSize = 128;
  49. constexpr int kAiCoreInfoSize = 256;
  50. constexpr int kDhaAtomicAddStatusSize = 256;
  51. constexpr int kL2AtomicAddStatusSize = 256;
  52. constexpr int kUint64Size = sizeof(uint64_t);
  53. const std::set<std::pair<std::string, std::string>> kSuppTransFormatPair = {
  54. // {device format, host format}
  55. {kOpFormat_FRAC_Z, kOpFormat_NCHW}, {kOpFormat_FRAC_NZ, kOpFormat_NCHW},
  56. {kOpFormat_NC1HWC0, kOpFormat_NCHW}, {kOpFormat_C1HWNCoC0, kOpFormat_NCHW},
  57. {kOpFormat_NC1HWC0_C04, kOpFormat_NCHW}, {kOpFormat_NDC1HWC0, kOpFormat_NCHW},
  58. {kOpFormat_FRACTAL_Z_3D, kOpFormat_NCHW}};
  59. const std::map<ProtoFormat, std::string> kFormatToStringMap = {
  60. {ProtoFormat::FORMAT_NCHW, kOpFormat_NCHW},
  61. {ProtoFormat::FORMAT_NHWC, kOpFormat_NHWC},
  62. {ProtoFormat::FORMAT_ND, kOpFormat_ND},
  63. {ProtoFormat::FORMAT_NC1HWC0, kOpFormat_NC1HWC0},
  64. {ProtoFormat::FORMAT_FRACTAL_Z, kOpFormat_FRAC_Z},
  65. {ProtoFormat::FORMAT_NC1HWC0_C04, kOpFormat_NC1HWC0_C04},
  66. {ProtoFormat::FORMAT_FRACTAL_Z_C04, kOpFormat_FRACTAL_Z_C04},
  67. {ProtoFormat::FORMAT_NC1KHKWHWC0, kOpFormat_NC1KHKWHWC0},
  68. {ProtoFormat::FORMAT_HWCN, kOpFormat_HWCN},
  69. {ProtoFormat::FORMAT_NDHWC, kOpFormat_NDHWC},
  70. {ProtoFormat::FORMAT_NCDHW, kOpFormat_NCDHW},
  71. {ProtoFormat::FORMAT_DHWCN, kOpFormat_DHWCN},
  72. {ProtoFormat::FORMAT_DHWNC, kOpFormat_DHWNC},
  73. {ProtoFormat::FORMAT_NDC1HWC0, kOpFormat_NDC1HWC0},
  74. {ProtoFormat::FORMAT_FRACTAL_Z_3D, kOpFormat_FRACTAL_Z_3D},
  75. {ProtoFormat::FORMAT_C1HWNCoC0, kOpFormat_C1HWNCoC0},
  76. {ProtoFormat::FORMAT_FRACTAL_NZ, kOpFormat_FRAC_NZ},
  77. {ProtoFormat::FORMAT_FRACTAL_ZN_LSTM, kOpFormat_FRACTAL_ZN_LSTM}};
  78. const std::map<ProtoDataType, mindspore::TypeId> kDataTypetoMSTypeMap = {
  79. {ProtoDataType::DT_UNDEFINED, mindspore::TypeId::kTypeUnknown},
  80. {ProtoDataType::DT_FLOAT, mindspore::TypeId::kNumberTypeFloat32},
  81. {ProtoDataType::DT_FLOAT16, mindspore::TypeId::kNumberTypeFloat16},
  82. {ProtoDataType::DT_INT8, mindspore::TypeId::kNumberTypeInt8},
  83. {ProtoDataType::DT_UINT8, mindspore::TypeId::kNumberTypeUInt8},
  84. {ProtoDataType::DT_INT16, mindspore::TypeId::kNumberTypeInt16},
  85. {ProtoDataType::DT_UINT16, mindspore::TypeId::kNumberTypeUInt16},
  86. {ProtoDataType::DT_INT32, mindspore::TypeId::kNumberTypeInt32},
  87. {ProtoDataType::DT_INT64, mindspore::TypeId::kNumberTypeInt64},
  88. {ProtoDataType::DT_UINT32, mindspore::TypeId::kNumberTypeUInt32},
  89. {ProtoDataType::DT_UINT64, mindspore::TypeId::kNumberTypeUInt64},
  90. {ProtoDataType::DT_BOOL, mindspore::TypeId::kNumberTypeBool},
  91. {ProtoDataType::DT_DOUBLE, mindspore::TypeId::kNumberTypeFloat64},
  92. {ProtoDataType::DT_STRING, mindspore::TypeId::kObjectTypeString}};
  93. #endif
  94. std::string GenDataFilePath(const CNodePtr &node, const std::string &kernel_name, const std::string &dump_path,
  95. size_t slot, bool is_input) {
  96. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  97. std::string op_name = GetOpNameWithoutScope(kernel_name);
  98. uint64_t timestamp = GetTimeStamp();
  99. uint32_t task_id = 0;
  100. uint32_t stream_id = 0;
  101. std::string tensor_type = is_input ? ".input." : ".output.";
  102. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  103. std::to_string(stream_id) + '.' + std::to_string(timestamp) + tensor_type +
  104. std::to_string(slot);
  105. return file_path;
  106. }
  107. bool E2eDump::IsDeviceTargetGPU() {
  108. auto context = MsContext::GetInstance();
  109. MS_EXCEPTION_IF_NULL(context);
  110. return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
  111. }
  112. /*
  113. * Feature group: Dump.
  114. * Target device group: GPU.
  115. * Runtime category: Old runtime, MindRT.
  116. * Description: This function is for dumping tensor in memory to disk in GPU machine.
  117. */
  118. void E2eDump::DumpGPUMemToFile(const Debugger *debugger, const std::string &file_path, bool trans_flag,
  119. const device::DeviceAddress &addr, const std::string &original_kernel_name, size_t slot,
  120. const ShapeVector &int_shapes, const TypeId &host_type) {
  121. #ifdef ENABLE_DEBUGGER
  122. auto format = kOpFormat_DEFAULT;
  123. MS_EXCEPTION_IF_NULL(debugger);
  124. auto ret = debugger->DumpTensorToFile(file_path, trans_flag, format, addr.format(), original_kernel_name, slot,
  125. int_shapes, host_type);
  126. if (!ret) {
  127. MS_LOG(INFO) << "DumpTensorToFile Failed: flag:" << trans_flag << ", path:" << file_path
  128. << ", host_format:" << format;
  129. }
  130. #endif
  131. }
  132. void E2eDump::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
  133. MS_EXCEPTION_IF_NULL(graph);
  134. auto &dump_json_parser = DumpJsonParser::GetInstance();
  135. if (!dump_json_parser.OutputNeedDump()) {
  136. return;
  137. }
  138. MS_LOG(INFO) << "Start e2e dump output";
  139. bool trans_flag = dump_json_parser.trans_flag();
  140. const auto &apply_kernels = graph->execution_order();
  141. for (const auto &node : apply_kernels) {
  142. MS_EXCEPTION_IF_NULL(node);
  143. std::string kernel_name = GetKernelNodeName(node);
  144. if (!dump_json_parser.NeedDump(kernel_name)) {
  145. continue;
  146. }
  147. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  148. DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  149. }
  150. }
  151. void E2eDump::DumpOutputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
  152. auto &dump_json_parser = DumpJsonParser::GetInstance();
  153. if (!dump_json_parser.OutputNeedDump()) {
  154. return;
  155. }
  156. bool trans_flag = dump_json_parser.trans_flag();
  157. MS_EXCEPTION_IF_NULL(node);
  158. std::string kernel_name = GetKernelNodeName(node);
  159. if (!dump_json_parser.NeedDump(kernel_name)) {
  160. return;
  161. }
  162. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  163. DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  164. }
  165. void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  166. std::string *kernel_name, const Debugger *debugger) {
  167. MS_EXCEPTION_IF_NULL(node);
  168. GetFileKernelName(NOT_NULL(kernel_name));
  169. auto output_size = common::AnfAlgo::GetOutputTensorNum(node);
  170. for (size_t j = 0; j < output_size; ++j) {
  171. if (!AnfAlgo::OutputAddrExist(node, j)) {
  172. continue;
  173. }
  174. auto addr = AnfAlgo::GetOutputAddr(node, j);
  175. MS_EXCEPTION_IF_NULL(addr);
  176. ShapeVector int_shapes;
  177. GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
  178. auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
  179. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  180. std::string op_name = GetOpNameWithoutScope(*kernel_name);
  181. uint32_t task_id = 0;
  182. uint32_t stream_id = 0;
  183. uint64_t timestamp = GetTimeStamp();
  184. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  185. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output." +
  186. std::to_string(j);
  187. if (DumpJsonParser::GetInstance().IsStatisticDump() &&
  188. (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag())) {
  189. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, false, j, j);
  190. (void)stat_dump.DumpTensorStatsToFile(GetKernelNodeName(node), dump_path, debugger);
  191. }
  192. if (DumpJsonParser::GetInstance().IsTensorDump()) {
  193. if (IsDeviceTargetGPU()) {
  194. DumpGPUMemToFile(debugger, file_path, trans_flag, *addr, GetKernelNodeName(node), j, int_shapes, type);
  195. } else {
  196. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  197. }
  198. }
  199. }
  200. }
  201. void E2eDump::DumpOutputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  202. std::string *kernel_name) {
  203. auto debugger = Debugger::GetInstance();
  204. MS_EXCEPTION_IF_NULL(debugger);
  205. if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) {
  206. MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
  207. return;
  208. }
  209. MS_EXCEPTION_IF_NULL(node);
  210. GetFileKernelName(NOT_NULL(kernel_name));
  211. auto output_size = common::AnfAlgo::GetOutputTensorNum(node);
  212. for (size_t j = 0; j < output_size; ++j) {
  213. if (!AnfAlgo::OutputAddrExist(node, j)) {
  214. continue;
  215. }
  216. auto addr = AnfAlgo::GetOutputAddr(node, j);
  217. MS_EXCEPTION_IF_NULL(addr);
  218. ShapeVector int_shapes;
  219. GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
  220. auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
  221. std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, false);
  222. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  223. }
  224. }
  225. void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
  226. MS_EXCEPTION_IF_NULL(graph);
  227. auto &dump_json_parser = DumpJsonParser::GetInstance();
  228. if (!dump_json_parser.InputNeedDump()) {
  229. return;
  230. }
  231. MS_LOG(INFO) << "Start e2e dump input";
  232. bool trans_flag = dump_json_parser.trans_flag();
  233. const auto &apply_kernels = graph->execution_order();
  234. for (const auto &node : apply_kernels) {
  235. MS_EXCEPTION_IF_NULL(node);
  236. std::string kernel_name = GetKernelNodeName(node);
  237. if (!dump_json_parser.NeedDump(kernel_name)) {
  238. continue;
  239. }
  240. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  241. DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  242. }
  243. }
  244. void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger,
  245. const KernelLaunchInfo *launch_info) {
  246. auto &dump_json_parser = DumpJsonParser::GetInstance();
  247. if (!dump_json_parser.InputNeedDump()) {
  248. return;
  249. }
  250. bool trans_flag = dump_json_parser.trans_flag();
  251. MS_EXCEPTION_IF_NULL(node);
  252. std::string kernel_name = GetKernelNodeName(node);
  253. if (!dump_json_parser.NeedDump(kernel_name)) {
  254. return;
  255. }
  256. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  257. DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger, launch_info);
  258. }
  259. std::shared_ptr<device::DeviceAddress> CreateAscendDeviceAddress(const KernelLaunchInfo *launch_info, size_t index,
  260. TypeId type) {
  261. MS_EXCEPTION_IF_NULL(launch_info);
  262. auto addr_ptr = launch_info->inputs_[index];
  263. auto ms_context = MsContext::GetInstance();
  264. MS_EXCEPTION_IF_NULL(ms_context);
  265. auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
  266. auto device_context =
  267. device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({kAscendDevice, device_id});
  268. auto format = kOpFormat_DEFAULT;
  269. MS_EXCEPTION_IF_NULL(addr_ptr);
  270. return device_context->CreateDeviceAddress(addr_ptr->addr, addr_ptr->size, format, type, ShapeVector());
  271. }
  272. void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  273. std::string *kernel_name, const Debugger *debugger, const KernelLaunchInfo *launch_info) {
  274. MS_EXCEPTION_IF_NULL(node);
  275. GetFileKernelName(NOT_NULL(kernel_name));
  276. auto input_size = common::AnfAlgo::GetInputTensorNum(node);
  277. for (size_t j = 0; j < input_size; ++j) {
  278. auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
  279. auto input = kernel_with_index.first;
  280. auto index = kernel_with_index.second;
  281. if (!AnfAlgo::OutputAddrExist(input, index)) {
  282. continue;
  283. }
  284. std::string tensor_name = GetKernelNodeName(node);
  285. size_t slot = j;
  286. if (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  287. auto input_kernel = node->input(j + 1);
  288. std::string input_kernel_name = GetKernelNodeName(input_kernel);
  289. tensor_name = input_kernel_name;
  290. slot = 0;
  291. }
  292. ShapeVector int_shapes;
  293. GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
  294. auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
  295. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  296. std::string op_name = GetOpNameWithoutScope(*kernel_name);
  297. uint64_t timestamp = GetTimeStamp();
  298. uint32_t task_id = 0;
  299. uint32_t stream_id = 0;
  300. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  301. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j);
  302. auto addr = AnfAlgo::GetOutputAddr(input, index);
  303. MS_EXCEPTION_IF_NULL(addr);
  304. if (DumpJsonParser::GetInstance().IsStatisticDump() &&
  305. (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag())) {
  306. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, true, j, slot);
  307. (void)stat_dump.DumpTensorStatsToFile(tensor_name, dump_path, debugger);
  308. }
  309. if (DumpJsonParser::GetInstance().IsTensorDump()) {
  310. if (IsDeviceTargetGPU()) {
  311. DumpGPUMemToFile(debugger, file_path, trans_flag, *addr, tensor_name, slot, int_shapes, type);
  312. } else if (Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  313. // load address from launch_info when it's Ascend Kernel by kernel mode.
  314. auto ascend_device_addr = CreateAscendDeviceAddress(launch_info, j, type);
  315. DumpMemToFile(file_path, *ascend_device_addr, int_shapes, type, trans_flag);
  316. } else {
  317. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  318. }
  319. }
  320. }
  321. }
  322. void E2eDump::DumpInputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  323. std::string *kernel_name) {
  324. auto debugger = Debugger::GetInstance();
  325. MS_EXCEPTION_IF_NULL(debugger);
  326. if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) {
  327. MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
  328. return;
  329. }
  330. MS_EXCEPTION_IF_NULL(node);
  331. GetFileKernelName(NOT_NULL(kernel_name));
  332. auto input_size = common::AnfAlgo::GetInputTensorNum(node);
  333. for (size_t j = 0; j < input_size; ++j) {
  334. auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
  335. auto input = kernel_with_index.first;
  336. auto index = kernel_with_index.second;
  337. if (!AnfAlgo::OutputAddrExist(input, index)) {
  338. continue;
  339. }
  340. auto addr = AnfAlgo::GetOutputAddr(input, index);
  341. MS_EXCEPTION_IF_NULL(addr);
  342. ShapeVector int_shapes;
  343. GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
  344. auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
  345. std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, true);
  346. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  347. }
  348. }
  349. void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
  350. bool trans_flag, const Debugger *debugger) {
  351. MS_EXCEPTION_IF_NULL(anf_node);
  352. auto &dump_json_parser = DumpJsonParser::GetInstance();
  353. if ((!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) || IsValueNode<StringImm>(anf_node)) {
  354. return;
  355. }
  356. std::string node_name = GetKernelNodeName(anf_node);
  357. if (!dump_json_parser.NeedDump(node_name)) {
  358. return;
  359. }
  360. DumpJsonParser::GetInstance().MatchKernel(node_name);
  361. GetFileKernelName(NOT_NULL(&node_name));
  362. std::string dump_name = node_name;
  363. const std::string cst_prefix = "Default--";
  364. if (anf_node->isa<ValueNode>()) {
  365. if (dump_name.find(cst_prefix) == std::string::npos) {
  366. MS_LOG(INFO) << "Incorrect constant format: " << dump_name;
  367. return;
  368. }
  369. dump_name = node_name.substr(cst_prefix.length());
  370. trans_flag = false;
  371. }
  372. // check if output address exists, if not, return;
  373. if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) {
  374. return;
  375. }
  376. auto addr = AnfAlgo::GetOutputAddr(anf_node, output_index);
  377. MS_EXCEPTION_IF_NULL(addr);
  378. ShapeVector int_shapes;
  379. GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes), trans_flag);
  380. auto type = common::AnfAlgo::GetOutputInferDataType(anf_node, output_index);
  381. uint64_t timestamp = GetTimeStamp();
  382. uint32_t task_id = 0;
  383. uint32_t stream_id = 0;
  384. std::string file_path = dump_path + "/Parameter." + dump_name + '.' + std::to_string(task_id) + '.' +
  385. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
  386. if (IsDeviceTargetGPU()) {
  387. if (dump_json_parser.IsStatisticDump()) {
  388. TensorStatDump stat_dump("Parameter", dump_name, task_id, stream_id, timestamp, false, 0, 0);
  389. (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
  390. }
  391. if (dump_json_parser.IsTensorDump()) {
  392. DumpGPUMemToFile(debugger, file_path, trans_flag, *addr, node_name, 0, int_shapes, type);
  393. }
  394. } else {
  395. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  396. }
  397. }
  398. /*
  399. * Feature group: Dump.
  400. * Target device group: Ascend, GPU.
  401. * Runtime category: MindRT.
  402. * Description: This function is similar to DumpSingleAnfNode function but it is only for dumping parameters in mindRT.
  403. * This function uses GetParameterInfo to get dump info for the parameter node.
  404. */
  405. void E2eDump::DumpSingleParameterNode(const AnfNodePtr &anf_node, const std::string &dump_path, bool trans_flag,
  406. const Debugger *debugger) {
  407. MS_EXCEPTION_IF_NULL(anf_node);
  408. auto &dump_json_parser = DumpJsonParser::GetInstance();
  409. std::string node_name = GetKernelNodeName(anf_node);
  410. if (!anf_node->isa<Parameter>() || !dump_json_parser.NeedDump(node_name) || !dump_json_parser.OutputNeedDump()) {
  411. return;
  412. }
  413. DumpJsonParser::GetInstance().MatchKernel(node_name);
  414. GetFileKernelName(NOT_NULL(&node_name));
  415. ShapeVector int_shapes;
  416. TypeId type;
  417. TypeId device_type;
  418. auto addr = GetParameterInfo(anf_node, NOT_NULL(&int_shapes), NOT_NULL(&type), NOT_NULL(&device_type));
  419. if (addr == nullptr) {
  420. MS_LOG(DEBUG) << "Skip node: " << node_name << ". Parameter data is not available for mindRT.";
  421. return;
  422. }
  423. uint64_t timestamp = GetTimeStamp();
  424. uint32_t task_id = 0;
  425. uint32_t stream_id = 0;
  426. std::string file_path = dump_path + "/Parameter." + node_name + '.' + std::to_string(task_id) + '.' +
  427. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
  428. if (IsDeviceTargetGPU()) {
  429. if (dump_json_parser.IsStatisticDump()) {
  430. TensorStatDump stat_dump("Parameter", node_name, task_id, stream_id, timestamp, false, 0, 0);
  431. (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
  432. }
  433. if (dump_json_parser.IsTensorDump()) {
  434. DumpGPUMemToFile(debugger, file_path, trans_flag, *addr, node_name, 0, int_shapes, type);
  435. }
  436. } else {
  437. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  438. }
  439. }
  440. void E2eDump::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path,
  441. const Debugger *debugger) {
  442. MS_EXCEPTION_IF_NULL(graph);
  443. auto &dump_json_parser = DumpJsonParser::GetInstance();
  444. if (!dump_json_parser.OutputNeedDump()) {
  445. return;
  446. }
  447. MS_LOG(INFO) << "Start e2e dump parameters";
  448. bool trans_flag = dump_json_parser.trans_flag();
  449. // dump parameters
  450. const auto &parameters = graph->inputs();
  451. for (auto &item : parameters) {
  452. DumpSingleAnfNode(item, PARAMETER_OUTPUT_INDEX, dump_path, trans_flag, debugger);
  453. }
  454. }
  455. void E2eDump::DumpConstantData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
  456. MS_EXCEPTION_IF_NULL(graph);
  457. auto &dump_json_parser = DumpJsonParser::GetInstance();
  458. if (!IsDeviceTargetGPU() || !dump_json_parser.e2e_dump_enabled()) {
  459. return;
  460. }
  461. uint32_t graph_id = graph->graph_id();
  462. std::string cst_path = GenerateDumpPath(graph_id, rank_id, true);
  463. if (!Common::FileExists(cst_path)) {
  464. DumpConstantData(graph, cst_path, debugger);
  465. }
  466. }
  467. void E2eDump::DumpConstantData(const session::KernelGraph *graph, const std::string &cst_dump_path,
  468. const Debugger *debugger) {
  469. // Dump constant to npy file
  470. MS_EXCEPTION_IF_NULL(graph);
  471. auto &dump_json_parser = DumpJsonParser::GetInstance();
  472. MS_LOG(INFO) << "DumpConstants. Current iteration is " << dump_json_parser.cur_dump_iter();
  473. MS_LOG(INFO) << "Current graph id is " << graph->graph_id();
  474. if (!dump_json_parser.OutputNeedDump()) {
  475. return;
  476. }
  477. const auto value_nodes = graph->graph_value_nodes();
  478. for (auto &item : value_nodes) {
  479. DumpSingleAnfNode(item, VALUE_NODE_OUTPUT_INDEX, cst_dump_path, false, debugger);
  480. }
  481. }
  482. /*
  483. * Feature group: Dump.
  484. * Target device group: Ascend, GPU.
  485. * Runtime category: Old runtime.
  486. * Description: This function is for updating dump iteration for GPU and ascend old runtime.
  487. */
  488. void E2eDump::UpdateIterOldRTDump(const session::KernelGraph *graph) {
  489. MS_EXCEPTION_IF_NULL(graph);
  490. auto &dump_json_parser = DumpJsonParser::GetInstance();
  491. uint32_t graph_id = graph->graph_id();
  492. if (IsDeviceTargetGPU()) {
  493. if (starting_graph_id == INT32_MAX) {
  494. starting_graph_id = graph_id;
  495. } else if (starting_graph_id == graph_id && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
  496. // Update dump iter for mindrt runtime is done using UpdateIterGPUDump().
  497. // Update dump iter for GPU old runtime.
  498. dump_json_parser.UpdateDumpIter();
  499. }
  500. return;
  501. }
  502. // If device target is Ascend
  503. if (graph->IsDatasetGraph()) {
  504. MS_LOG(INFO) << "No need to update iteration for dataset graph.";
  505. return;
  506. }
  507. // In multi network scripts, dump iter is equal to the number of networks that have been executed so far.
  508. dump_json_parser.UpdateDumpIter();
  509. }
  510. /*
  511. * Feature group: Dump.
  512. * Target device group: Ascend, GPU.
  513. * Runtime category: MindRT.
  514. * Description: This function is for updating dump iteration for GPU and ascend MindRT dump. Please note that dump with
  515. * dataset_sink_mode = True is not supported for GPU.
  516. */
  517. void E2eDump::UpdateIterMindRTDump() {
  518. auto debugger = Debugger::GetInstance();
  519. // Dataset graph is always the first graph in the list when dataset_sink_mode is true.
  520. auto graph = (debugger->GetStepGraphPtrList())[0];
  521. auto context = MsContext::GetInstance();
  522. MS_EXCEPTION_IF_NULL(context);
  523. if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice && graph->IsDatasetGraph()) {
  524. MS_LOG(INFO) << "No need to update iteration for dataset graph.";
  525. return;
  526. }
  527. // update dump iter for GPU and kernel by kernel ascend dump.
  528. DumpJsonParser::GetInstance().UpdateDumpIter();
  529. }
  530. /*
  531. * Feature group: Dump.
  532. * Target device group: Ascend, GPU.
  533. * Runtime category: Old runtime, MindRT.
  534. * Description: Generates graph history files (dumping all the iteration numbers in which the graph was executed) for
  535. * the given graph and rank_id. If dataset_sink_mode is true for async dump in ascend, this function is called once per
  536. * each epoch and dumps all the iterations in the epoch to the graph history file.
  537. */
  538. void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
  539. auto &json_parser = DumpJsonParser::GetInstance();
  540. if (!(json_parser.async_dump_enabled() || json_parser.e2e_dump_enabled())) {
  541. return;
  542. }
  543. bool sink_mode = (ConfigManager::GetInstance().dataset_mode() || graph->IsDatasetGraph());
  544. auto iter_num = SizeToInt(LongToSize(ConfigManager::GetInstance().iter_num()));
  545. if (graph->IsDatasetGraph()) {
  546. MS_LOG(INFO) << "graph: " << graph->graph_id() << " is dataset graph, not creating graph history file.";
  547. return;
  548. }
  549. if (!IsDeviceTargetGPU() && (graph->graph_id() != graph->root_graph_id())) {
  550. // when device target is ascend, we only dump graph run iter for the root graph.
  551. return;
  552. }
  553. std::string execution_order_path = json_parser.path() + "/rank_" + std::to_string(rank_id) + "/execution_order/";
  554. std::string graph_str =
  555. IsDeviceTargetGPU() ? std::to_string(graph->graph_id()) : std::to_string(graph->root_graph_id());
  556. std::string file_name_to_check = execution_order_path + "/ms_global_execution_order_graph_" + graph_str + ".csv";
  557. auto real_path = Common::CreatePrefixPath(file_name_to_check);
  558. if (!real_path.has_value()) {
  559. MS_LOG(WARNING) << "Check file path: " << file_name_to_check << " failed.";
  560. return;
  561. }
  562. std::string file_name = real_path.value();
  563. ChangeFileMode(file_name, S_IWUSR);
  564. std::ofstream fout(file_name, std::ofstream::app);
  565. if (!fout.is_open()) {
  566. MS_LOG(WARNING) << "Open file for saving graph global execution order failed.";
  567. return;
  568. }
  569. if (sink_mode && json_parser.async_dump_enabled() && !Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  570. // for async dump when sink_mode = true, cur_dump_iter() = current_epoch
  571. // dump history for all iterations in the epoch
  572. Debugger::GetInstance()->UpdateGraphIterMap(graph->graph_id(), iter_num);
  573. auto graph_iter_map = Debugger::GetInstance()->GetGraphIterMap();
  574. auto step_per_epoch = IntToSize(graph_iter_map[graph->graph_id()]);
  575. for (size_t i = 0; i < step_per_epoch; i++) {
  576. auto step = (json_parser.cur_dump_iter() * step_per_epoch) + i;
  577. fout << (std::to_string(step) + "\n");
  578. }
  579. } else {
  580. fout << std::to_string(json_parser.cur_dump_iter()) + "\n";
  581. }
  582. fout.close();
  583. ChangeFileMode(file_name, S_IRUSR);
  584. }
  585. /*
  586. * Feature group: Dump.
  587. * Target device group: Ascend, GPU.
  588. * Runtime category: Old runtime, MindRT.
  589. * Description: This function is for dumping the whole graph. It is used for old runtime in GPU and Ascend and
  590. * super-kernel mindRT in Ascend.
  591. */
  592. void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
  593. MS_EXCEPTION_IF_NULL(graph);
  594. bool success = false;
  595. auto &dump_json_parser = DumpJsonParser::GetInstance();
  596. uint32_t graph_id = graph->graph_id();
  597. if (!dump_json_parser.e2e_dump_enabled()) {
  598. return;
  599. }
  600. if (dump_json_parser.GetIterDumpFlag()) {
  601. MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
  602. MS_LOG(INFO) << "Current graph id is " << graph_id;
  603. std::string dump_path = GenerateDumpPath(graph_id, rank_id);
  604. if (dump_json_parser.IsStatisticDump()) {
  605. (void)TensorStatDump::OpenStatisticsFile(dump_path);
  606. }
  607. DumpInput(graph, dump_path, debugger);
  608. DumpOutput(graph, dump_path, debugger);
  609. if (!MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
  610. // Dump parameters for old runtime. For mindRT it is done in PostExecuteGraphDebugger.
  611. DumpParameters(graph, dump_path, debugger);
  612. // DumpConstantData for GPU old runtime.
  613. DumpConstantData(graph, rank_id, debugger);
  614. }
  615. if (dump_json_parser.IsStatisticDump()) {
  616. CsvWriter::GetInstance().CloseFile();
  617. }
  618. success = true;
  619. }
  620. if (success) {
  621. MS_LOG(DEBUG) << "E2eDump Dump Data completed!";
  622. } else {
  623. MS_LOG(DEBUG) << "E2eDump Dump has not occurred!";
  624. }
  625. }
  626. /*
  627. * Feature group: Dump.
  628. * Target device group: Ascend, GPU.
  629. * Runtime category: MindRT.
  630. * Description: This function is for dumping a single node. It is used for mindrt in GPU and Ascend kernel-by-kernel.
  631. */
  632. bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger,
  633. const KernelLaunchInfo *launch_info) {
  634. bool success = false;
  635. auto &dump_json_parser = DumpJsonParser::GetInstance();
  636. if (dump_json_parser.DumpEnabledForIter()) {
  637. std::string dump_path = GenerateDumpPath(graph_id, rank_id);
  638. DumpInputSingleNode(node, dump_path, debugger, launch_info);
  639. DumpOutputSingleNode(node, dump_path, debugger);
  640. success = true;
  641. }
  642. return success;
  643. }
  644. /*
  645. * Feature group: Dump.
  646. * Target device group: Ascend, GPU.
  647. * Runtime category: MindRT.
  648. * Description: This function is for dumping all the parameters in the current root graph for GPU, Ascend superkernel
  649. * (e2e dump) and Ascend kernel-by-kernel (e2e and async dump).
  650. */
  651. void E2eDump::DumpParametersData(uint32_t rank_id, const Debugger *debugger) {
  652. uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
  653. auto &dump_json_parser = DumpJsonParser::GetInstance();
  654. if (dump_json_parser.async_dump_enabled() && !debugger->GetAscendKernelByKernelFlag()) {
  655. // Dump parameters for mindRT in async dump only for kernel by kernel mode.
  656. return;
  657. }
  658. if (dump_json_parser.DumpEnabledForIter()) {
  659. MS_LOG(INFO) << "DumpParameters. Current iteration is " << dump_json_parser.cur_dump_iter();
  660. MS_LOG(INFO) << "Current root graph id is " << root_graph_id;
  661. std::string dump_path = GenerateDumpPath(root_graph_id, rank_id);
  662. bool trans_flag = dump_json_parser.trans_flag();
  663. for (auto &item : debugger->GetParametersMindRT()) {
  664. DumpSingleParameterNode(item, dump_path, trans_flag, debugger);
  665. }
  666. }
  667. }
  668. #ifdef ENABLE_D
  669. template <typename T>
  670. dump_data_t ParseAttrsFromDumpData(const std::string &dump_path, char *data_ptr, const T &tensor, const std::string &io,
  671. uint32_t slot) {
  672. // get data type
  673. auto iter_dtype = kDataTypetoMSTypeMap.find(tensor.data_type());
  674. if (iter_dtype == kDataTypetoMSTypeMap.end()) {
  675. MS_LOG(INFO) << "Unsupported data type for tensor " << dump_path << ": unknown(" << tensor.data_type() << ")";
  676. return dump_data_t{};
  677. }
  678. auto data_type = iter_dtype->second;
  679. // get format
  680. auto iter_fmt = kFormatToStringMap.find(tensor.format());
  681. if (iter_fmt == kFormatToStringMap.end()) {
  682. MS_LOG(INFO) << "Unsupported tensor format for tensor " << dump_path << ": unknown(" << tensor.format() << ")";
  683. return dump_data_t{};
  684. }
  685. std::string device_format = iter_fmt->second;
  686. // get shape
  687. ShapeVector shape_d;
  688. (void)std::transform(tensor.shape().dim().begin(), tensor.shape().dim().end(), std::back_inserter(shape_d),
  689. SizeToLong);
  690. ShapeVector shape_to;
  691. (void)std::transform(tensor.original_shape().dim().begin(), tensor.original_shape().dim().end(),
  692. std::back_inserter(shape_to), SizeToLong);
  693. // get size and sub_format
  694. size_t data_size = (size_t)tensor.size();
  695. int32_t sub_format = tensor.sub_format();
  696. return dump_data_t{dump_path, data_ptr, data_type, device_format, shape_d, shape_to, data_size, sub_format, io, slot};
  697. }
  698. /*
  699. * Feature group: Dump.
  700. * Target device group: Ascend.
  701. * Runtime category: Old runtime, MindRT.
  702. * Description: This function is for ascend A+M dump only. It parses and converts each slot of tensor in DumpData object
  703. * and dump the tensor data in npy file or statistic data in csv file.
  704. */
  705. void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
  706. char *data_ptr) {
  707. std::vector<dump_data_t> dump_tensor_vec;
  708. // dump input tensors
  709. std::vector<debugger::dump::OpInput> input_tensors(dump_data.input().begin(), dump_data.input().end());
  710. uint64_t offset = 0;
  711. for (uint32_t slot = 0; slot < input_tensors.size(); slot++) {
  712. auto in_tensor = input_tensors[slot];
  713. dump_tensor_vec.push_back(ParseAttrsFromDumpData(dump_path, data_ptr + offset, in_tensor, "input", slot));
  714. offset += in_tensor.size();
  715. }
  716. // dump output tensors
  717. std::vector<debugger::dump::OpOutput> output_tensors(dump_data.output().begin(), dump_data.output().end());
  718. for (uint32_t slot = 0; slot < output_tensors.size(); slot++) {
  719. auto out_tensor = output_tensors[slot];
  720. dump_tensor_vec.push_back(ParseAttrsFromDumpData(dump_path, data_ptr + offset, out_tensor, "output", slot));
  721. offset += out_tensor.size();
  722. }
  723. // assign slot conversion task to different thread.
  724. if (dump_tensor_vec.empty()) {
  725. return;
  726. }
  727. constexpr int kMaxTensorSize = 1048576;
  728. if (offset <= kMaxTensorSize) {
  729. // If the total tensor size is less than 1Mb, do it in single thread.
  730. ConvertFormatForTensors(&dump_tensor_vec, 0, dump_tensor_vec.size() - 1);
  731. } else {
  732. // In multi_thread process, we only use 1/4 of the total concurrent threads.
  733. uint32_t ratio_divider = 4;
  734. auto default_num_workers = std::max<uint32_t>(1, std::thread::hardware_concurrency() / ratio_divider);
  735. auto num_threads = std::min<uint32_t>(default_num_workers, dump_tensor_vec.size());
  736. uint32_t task_size = dump_tensor_vec.size() / num_threads;
  737. uint32_t remainder = dump_tensor_vec.size() % num_threads;
  738. std::vector<std::thread> threads;
  739. threads.reserve(num_threads);
  740. MS_LOG(INFO) << "Number of threads used for A+M dump: " << num_threads;
  741. for (size_t t = 0; t < threads.capacity(); t++) {
  742. uint32_t start_idx = t * task_size;
  743. uint32_t end_idx = start_idx + task_size - 1;
  744. if (t == num_threads - 1) {
  745. end_idx += remainder;
  746. }
  747. threads.emplace_back(std::thread(&E2eDump::ConvertFormatForTensors, &dump_tensor_vec, start_idx, end_idx));
  748. }
  749. for (auto &thd : threads) {
  750. if (thd.joinable()) {
  751. thd.join();
  752. }
  753. }
  754. }
  755. for (auto &dump_tensor_item : dump_tensor_vec) {
  756. (void)DumpTensorStatsIfNeeded(dump_tensor_item);
  757. }
  758. }
  759. void E2eDump::ConvertFormatForTensors(std::vector<dump_data_t> *dump_tensor_vec, uint32_t start_idx, uint32_t end_idx) {
  760. for (uint32_t idx = start_idx; idx <= end_idx; idx++) {
  761. auto &dump_data_obj = dump_tensor_vec->at(idx);
  762. auto succ = ConvertFormatForOneTensor(&dump_data_obj);
  763. if (!succ) {
  764. MS_LOG(INFO) << "Failed to convert format for tensor " << dump_data_obj.dump_file_path << "."
  765. << dump_data_obj.in_out_str << "." << dump_data_obj.slot;
  766. }
  767. (void)DumpTensorDataIfNeeded(dump_data_obj);
  768. }
  769. }
  770. /*
  771. * Feature group: Dump.
  772. * Target device group: Ascend.
  773. * Runtime category: Old runtime, MindRT.
  774. * Description: It serves for A+M dump. Save tensor into dump path as configured.
  775. */
  776. bool E2eDump::DumpTensorDataIfNeeded(const dump_data_t &dump_tensor_info) {
  777. if (!DumpJsonParser::GetInstance().IsTensorDump()) {
  778. return true;
  779. }
  780. // dump_path: dump_dir/op_type.op_name.task_id.stream_id.timestamp
  781. std::ostringstream dump_path_ss;
  782. dump_path_ss << dump_tensor_info.dump_file_path << "." << dump_tensor_info.in_out_str << "." << dump_tensor_info.slot
  783. << "." << dump_tensor_info.format;
  784. std::string dump_path_slot = dump_path_ss.str();
  785. std::shared_ptr<tensor::Tensor> trans_buf = dump_tensor_info.trans_buf;
  786. bool dump_succ = false;
  787. if (trans_buf) {
  788. dump_succ = DumpJsonParser::DumpToFile(dump_path_slot, trans_buf->data_c(), trans_buf->Size(),
  789. dump_tensor_info.host_shape, dump_tensor_info.data_type);
  790. } else {
  791. dump_succ = DumpJsonParser::DumpToFile(dump_path_slot, dump_tensor_info.data_ptr, dump_tensor_info.data_size,
  792. dump_tensor_info.host_shape, dump_tensor_info.data_type);
  793. }
  794. return dump_succ;
  795. }
  796. /*
  797. * Feature group: Dump.
  798. * Target device group: Ascend.
  799. * Runtime category: Old runtime, MindRT.
  800. * Description: It serves for A+M dump. Save statistic of the tensor data into dump path as configured.
  801. */
  802. bool E2eDump::DumpTensorStatsIfNeeded(const dump_data_t &dump_tensor_info) {
  803. // dump_path: dump_dir/op_type.op_name.task_id.stream_id.timestamp
  804. if (!DumpJsonParser::GetInstance().IsStatisticDump()) {
  805. return true;
  806. }
  807. std::string dump_path = dump_tensor_info.dump_file_path;
  808. size_t pos = dump_path.rfind("/");
  809. std::string file_name = dump_path.substr(pos + 1);
  810. size_t first_dot = file_name.find(".");
  811. size_t fourth_dot = file_name.rfind(".");
  812. size_t third_dot = file_name.rfind(".", fourth_dot - 1);
  813. size_t second_dot = file_name.rfind(".", third_dot - 1);
  814. if (first_dot == std::string::npos || second_dot == std::string::npos || third_dot == std::string::npos ||
  815. first_dot == second_dot) {
  816. MS_LOG(ERROR) << "Dump path " << dump_path << " received is not well formed";
  817. return false;
  818. }
  819. std::string op_type = file_name.substr(0, first_dot);
  820. std::string op_name = file_name.substr(first_dot + 1, second_dot - first_dot - 1);
  821. std::string task_id = file_name.substr(second_dot + 1, third_dot - second_dot - 1);
  822. std::string stream_id = file_name.substr(third_dot + 1, fourth_dot - third_dot - 1);
  823. std::string timestamp = file_name.substr(fourth_dot + 1);
  824. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, dump_tensor_info.in_out_str,
  825. dump_tensor_info.slot, dump_tensor_info.slot);
  826. std::shared_ptr<TensorData> data = std::make_shared<TensorData>();
  827. if (dump_tensor_info.data_type <= TypeId::kNumberTypeBegin ||
  828. dump_tensor_info.data_type >= TypeId::kNumberTypeComplex64) {
  829. MS_LOG(ERROR) << "Data type of operator " << file_name << " is not supported by statistic dump";
  830. return false;
  831. }
  832. std::shared_ptr<tensor::Tensor> trans_buf = dump_tensor_info.trans_buf;
  833. if (trans_buf) {
  834. data->SetByteSize(trans_buf->Size());
  835. data->SetDataPtr(static_cast<char *>(trans_buf->data_c()));
  836. } else {
  837. data->SetByteSize(dump_tensor_info.data_size);
  838. data->SetDataPtr(dump_tensor_info.data_ptr);
  839. }
  840. data->SetType(dump_tensor_info.data_type);
  841. data->SetShape(dump_tensor_info.host_shape);
  842. return stat_dump.DumpTensorStatsToFile(dump_path.substr(0, pos), data);
  843. }
  844. /*
  845. * Feature group: Dump.
  846. * Target device group: Ascend.
  847. * Runtime category: Old runtime, MindRT.
  848. * Description: It serves for A+M dump. Convert tensor from device format to host format if needed.
  849. */
  850. bool E2eDump::ConvertFormatForOneTensor(dump_data_t *dump_tensor_info) {
  851. bool trans_success = false;
  852. auto trans_buf = std::make_shared<tensor::Tensor>(dump_tensor_info->data_type, dump_tensor_info->host_shape);
  853. // convert format to host format. It can be either NCHW or ND (non 4-dimemsions).
  854. const uint8_t kNumFourDim = 4;
  855. std::string host_format;
  856. std::string device_format = dump_tensor_info->format;
  857. if (dump_tensor_info->host_shape.size() == kNumFourDim) {
  858. host_format = kOpFormat_NCHW;
  859. } else {
  860. host_format = kOpFormat_ND;
  861. }
  862. if (device_format != host_format) {
  863. auto iter = kSuppTransFormatPair.find(std::make_pair(device_format, host_format));
  864. if (iter == kSuppTransFormatPair.end()) {
  865. MS_LOG(INFO) << "Do not support convert from format " << device_format << " to " << host_format << " for tensor "
  866. << dump_tensor_info->dump_file_path << "." << dump_tensor_info->in_out_str << "."
  867. << dump_tensor_info->slot;
  868. } else {
  869. const trans::FormatArgs format_args{dump_tensor_info->data_ptr,
  870. dump_tensor_info->data_size,
  871. host_format,
  872. device_format,
  873. dump_tensor_info->host_shape,
  874. dump_tensor_info->device_shape,
  875. dump_tensor_info->data_type};
  876. auto group = dump_tensor_info->sub_format > 1 ? dump_tensor_info->sub_format : 1;
  877. trans_success = trans::TransFormatFromDeviceToHost(format_args, trans_buf->data_c(), group);
  878. if (!trans_success) {
  879. MS_LOG(ERROR) << "Trans format failed.";
  880. }
  881. }
  882. }
  883. if (trans_success) {
  884. dump_tensor_info->format = host_format;
  885. dump_tensor_info->trans_buf = trans_buf;
  886. }
  887. return trans_success;
  888. }
  889. uint64_t UnpackUint64Value(char *ptr) {
  890. #if defined(__APPLE__)
  891. return *reinterpret_cast<const uint64_t *>(ptr);
  892. #else
  893. return le64toh(*reinterpret_cast<const uint64_t *>(ptr));
  894. #endif
  895. }
  896. std::string IntToHexString(const uint64_t value) {
  897. std::stringstream ss;
  898. ss << "0x" << std::hex << value;
  899. return ss.str();
  900. }
  901. nlohmann::json E2eDump::ParseOverflowInfo(char *data_ptr) {
  902. uint32_t index = 0;
  903. uint64_t model_id = UnpackUint64Value(data_ptr);
  904. index += kUint64Size;
  905. uint64_t stream_id = UnpackUint64Value(data_ptr + index);
  906. index += kUint64Size;
  907. uint64_t task_id = UnpackUint64Value(data_ptr + index);
  908. index += kUint64Size;
  909. uint64_t task_type = UnpackUint64Value(data_ptr + index);
  910. index += kUint64Size;
  911. uint64_t pc_start = UnpackUint64Value(data_ptr + index);
  912. index += kUint64Size;
  913. uint64_t para_base = UnpackUint64Value(data_ptr + index);
  914. nlohmann::json overflow_info;
  915. overflow_info["model_id"] = model_id;
  916. overflow_info["stream_id"] = stream_id;
  917. overflow_info["task_id"] = task_id;
  918. overflow_info["task_type"] = task_type;
  919. overflow_info["pc_start"] = IntToHexString(pc_start);
  920. overflow_info["para_base"] = IntToHexString(para_base);
  921. return overflow_info;
  922. }
  923. /*
  924. * Feature group: Dump.
  925. * Target device group: Ascend.
  926. * Runtime category: Old runtime, MindRT.
  927. * Description: This function is for Ascend A+M dump. It parses and dump op overflow info in json file.
  928. */
  929. void E2eDump::DumpOpDebugToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
  930. char *data_ptr) {
  931. std::string out_path = dump_path + ".output.";
  932. std::vector<debugger::dump::OpOutput> op_debug(dump_data.output().begin(), dump_data.output().end());
  933. for (uint32_t slot = 0; slot < op_debug.size(); slot++) {
  934. uint32_t index = 0;
  935. // parse DHA Atomic Add info
  936. nlohmann::json dha_atomic_add_info = ParseOverflowInfo(data_ptr + index);
  937. index += kDhaAtomicAddInfoSize;
  938. // parse L2 Atomic Add info
  939. nlohmann::json l2_atomic_add_info = ParseOverflowInfo(data_ptr + index);
  940. index += kL2AtomicAddInfoSize;
  941. // parse AICore info
  942. nlohmann::json ai_core_info = ParseOverflowInfo(data_ptr + index);
  943. index += kAiCoreInfoSize;
  944. // parse DHA Atomic Add status
  945. dha_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
  946. index += kDhaAtomicAddStatusSize;
  947. // parse L2 Atomic Add status
  948. l2_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
  949. index += kL2AtomicAddStatusSize;
  950. // parse AICore status
  951. uint64_t kernel_code = UnpackUint64Value(data_ptr + index);
  952. index += kUint64Size;
  953. uint64_t block_idx = UnpackUint64Value(data_ptr + index);
  954. index += kUint64Size;
  955. uint64_t status = UnpackUint64Value(data_ptr + index);
  956. ai_core_info["kernel_code"] = IntToHexString(kernel_code);
  957. ai_core_info["block_idx"] = block_idx;
  958. ai_core_info["status"] = status;
  959. nlohmann::json opdebug_data;
  960. opdebug_data["DHA Atomic Add"] = dha_atomic_add_info;
  961. opdebug_data["L2 Atomic Add"] = l2_atomic_add_info;
  962. opdebug_data["AI Core"] = ai_core_info;
  963. // save json to file
  964. DumpToFile(out_path + std::to_string(slot) + ".json", opdebug_data.dump());
  965. }
  966. }
  967. #endif // ENABLE_D
  968. } // namespace mindspore