You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

e2e_dump.cc 44 kB

4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996
  1. /**
  2. * Copyright 2020-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/data_dump/e2e_dump.h"
  17. #include <unistd.h>
  18. #include <sstream>
  19. #include <algorithm>
  20. #include <map>
  21. #include <memory>
  22. #include <set>
  23. #include <utility>
  24. #include <vector>
  25. #include "debug/data_dump/dump_json_parser.h"
  26. #include "runtime/device/ms_device_shape_transfer.h"
  27. #include "debug/anf_ir_utils.h"
  28. #include "debug/common.h"
  29. #include "backend/common/session/anf_runtime_algorithm.h"
  30. #include "include/common/utils/anfalgo.h"
  31. #include "utils/ms_context.h"
  32. #include "runtime/device/kernel_runtime_manager.h"
  33. #include "include/common/utils/config_manager.h"
  34. #include "utils/file_utils.h"
  35. #include "debug/data_dump/tensor_stat_dump.h"
  36. #include "abstract/utils.h"
  37. #include "runtime/hardware/device_context_manager.h"
  38. #ifdef ENABLE_DEBUGGER
  39. #include "debug/debug_services.h"
  40. #include "debug/tensor_load.h"
  41. #include "debug/debugger/debugger.h"
  42. #endif
  43. namespace mindspore {
  44. #ifdef ENABLE_D
  45. using ProtoFormat = debugger::dump::OutputFormat;
  46. using ProtoDataType = debugger::dump::OutputDataType;
  47. constexpr int kDhaAtomicAddInfoSize = 128;
  48. constexpr int kL2AtomicAddInfoSize = 128;
  49. constexpr int kAiCoreInfoSize = 256;
  50. constexpr int kDhaAtomicAddStatusSize = 256;
  51. constexpr int kL2AtomicAddStatusSize = 256;
  52. constexpr int kUint64Size = sizeof(uint64_t);
  53. const std::set<std::pair<std::string, std::string>> kSuppTransFormatPair = {
  54. // {device format, host format}
  55. {kOpFormat_FRAC_Z, kOpFormat_NCHW}, {kOpFormat_FRAC_NZ, kOpFormat_NCHW},
  56. {kOpFormat_NC1HWC0, kOpFormat_NCHW}, {kOpFormat_C1HWNCoC0, kOpFormat_NCHW},
  57. {kOpFormat_NC1HWC0_C04, kOpFormat_NCHW}, {kOpFormat_NDC1HWC0, kOpFormat_NCHW},
  58. {kOpFormat_FRACTAL_Z_3D, kOpFormat_NCHW}};
  59. const std::map<ProtoFormat, std::string> kFormatToStringMap = {
  60. {ProtoFormat::FORMAT_NCHW, kOpFormat_NCHW},
  61. {ProtoFormat::FORMAT_NHWC, kOpFormat_NHWC},
  62. {ProtoFormat::FORMAT_ND, kOpFormat_ND},
  63. {ProtoFormat::FORMAT_NC1HWC0, kOpFormat_NC1HWC0},
  64. {ProtoFormat::FORMAT_FRACTAL_Z, kOpFormat_FRAC_Z},
  65. {ProtoFormat::FORMAT_NC1HWC0_C04, kOpFormat_NC1HWC0_C04},
  66. {ProtoFormat::FORMAT_FRACTAL_Z_C04, kOpFormat_FRACTAL_Z_C04},
  67. {ProtoFormat::FORMAT_NC1KHKWHWC0, kOpFormat_NC1KHKWHWC0},
  68. {ProtoFormat::FORMAT_HWCN, kOpFormat_HWCN},
  69. {ProtoFormat::FORMAT_NDHWC, kOpFormat_NDHWC},
  70. {ProtoFormat::FORMAT_NCDHW, kOpFormat_NCDHW},
  71. {ProtoFormat::FORMAT_DHWCN, kOpFormat_DHWCN},
  72. {ProtoFormat::FORMAT_DHWNC, kOpFormat_DHWNC},
  73. {ProtoFormat::FORMAT_NDC1HWC0, kOpFormat_NDC1HWC0},
  74. {ProtoFormat::FORMAT_FRACTAL_Z_3D, kOpFormat_FRACTAL_Z_3D},
  75. {ProtoFormat::FORMAT_C1HWNCoC0, kOpFormat_C1HWNCoC0},
  76. {ProtoFormat::FORMAT_FRACTAL_NZ, kOpFormat_FRAC_NZ},
  77. {ProtoFormat::FORMAT_FRACTAL_ZN_LSTM, kOpFormat_FRACTAL_ZN_LSTM}};
  78. const std::map<ProtoDataType, mindspore::TypeId> kDataTypetoMSTypeMap = {
  79. {ProtoDataType::DT_UNDEFINED, mindspore::TypeId::kTypeUnknown},
  80. {ProtoDataType::DT_FLOAT, mindspore::TypeId::kNumberTypeFloat32},
  81. {ProtoDataType::DT_FLOAT16, mindspore::TypeId::kNumberTypeFloat16},
  82. {ProtoDataType::DT_INT8, mindspore::TypeId::kNumberTypeInt8},
  83. {ProtoDataType::DT_UINT8, mindspore::TypeId::kNumberTypeUInt8},
  84. {ProtoDataType::DT_INT16, mindspore::TypeId::kNumberTypeInt16},
  85. {ProtoDataType::DT_UINT16, mindspore::TypeId::kNumberTypeUInt16},
  86. {ProtoDataType::DT_INT32, mindspore::TypeId::kNumberTypeInt32},
  87. {ProtoDataType::DT_INT64, mindspore::TypeId::kNumberTypeInt64},
  88. {ProtoDataType::DT_UINT32, mindspore::TypeId::kNumberTypeUInt32},
  89. {ProtoDataType::DT_UINT64, mindspore::TypeId::kNumberTypeUInt64},
  90. {ProtoDataType::DT_BOOL, mindspore::TypeId::kNumberTypeBool},
  91. {ProtoDataType::DT_DOUBLE, mindspore::TypeId::kNumberTypeFloat64},
  92. {ProtoDataType::DT_STRING, mindspore::TypeId::kObjectTypeString}};
  93. #endif
  94. std::string GenDataFilePath(const CNodePtr &node, const std::string &kernel_name, const std::string &dump_path,
  95. size_t slot, bool is_input) {
  96. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  97. std::string op_name = GetOpNameWithoutScope(kernel_name);
  98. uint64_t timestamp = GetTimeStamp();
  99. uint32_t task_id = 0;
  100. uint32_t stream_id = 0;
  101. std::string tensor_type = is_input ? ".input." : ".output.";
  102. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  103. std::to_string(stream_id) + '.' + std::to_string(timestamp) + tensor_type +
  104. std::to_string(slot);
  105. return file_path;
  106. }
  107. bool E2eDump::IsDeviceTargetGPU() {
  108. auto context = MsContext::GetInstance();
  109. MS_EXCEPTION_IF_NULL(context);
  110. return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
  111. }
  112. /*
  113. * Feature group: Dump.
  114. * Target device group: GPU.
  115. * Runtime category: Old runtime, MindRT.
  116. * Description: This function is for dumping tensor in memory to disk in GPU machine.
  117. */
  118. void E2eDump::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
  119. const device::DeviceAddress &addr, const ShapeVector &int_shapes,
  120. const TypeId &host_type, const TypeId &device_type, bool trans_flag, size_t slot,
  121. const Debugger *debugger) {
  122. #ifdef ENABLE_DEBUGGER
  123. auto format = kOpFormat_DEFAULT;
  124. MS_EXCEPTION_IF_NULL(debugger);
  125. auto ret = debugger->DumpTensorToFile(original_kernel_name, trans_flag, file_path, format, int_shapes, host_type,
  126. device_type, addr.format(), slot);
  127. if (!ret) {
  128. MS_LOG(INFO) << "DumpTensorToFile Failed: flag:" << trans_flag << ", path:" << file_path
  129. << ", host_format:" << format;
  130. }
  131. #endif
  132. }
  133. void E2eDump::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
  134. MS_EXCEPTION_IF_NULL(graph);
  135. auto &dump_json_parser = DumpJsonParser::GetInstance();
  136. if (!dump_json_parser.OutputNeedDump()) {
  137. return;
  138. }
  139. MS_LOG(INFO) << "Start e2e dump output";
  140. bool trans_flag = dump_json_parser.trans_flag();
  141. const auto &apply_kernels = graph->execution_order();
  142. for (const auto &node : apply_kernels) {
  143. MS_EXCEPTION_IF_NULL(node);
  144. std::string kernel_name = GetKernelNodeName(node);
  145. if (!dump_json_parser.NeedDump(kernel_name)) {
  146. continue;
  147. }
  148. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  149. DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  150. }
  151. }
  152. void E2eDump::DumpOutputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
  153. auto &dump_json_parser = DumpJsonParser::GetInstance();
  154. if (!dump_json_parser.OutputNeedDump()) {
  155. return;
  156. }
  157. bool trans_flag = dump_json_parser.trans_flag();
  158. MS_EXCEPTION_IF_NULL(node);
  159. std::string kernel_name = GetKernelNodeName(node);
  160. if (!dump_json_parser.NeedDump(kernel_name)) {
  161. return;
  162. }
  163. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  164. DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  165. }
  166. void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  167. std::string *kernel_name, const Debugger *debugger) {
  168. MS_EXCEPTION_IF_NULL(node);
  169. GetFileKernelName(NOT_NULL(kernel_name));
  170. auto output_size = common::AnfAlgo::GetOutputTensorNum(node);
  171. for (size_t j = 0; j < output_size; ++j) {
  172. if (!AnfAlgo::OutputAddrExist(node, j)) {
  173. continue;
  174. }
  175. auto addr = AnfAlgo::GetOutputAddr(node, j);
  176. MS_EXCEPTION_IF_NULL(addr);
  177. ShapeVector int_shapes;
  178. GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
  179. auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
  180. auto device_type = AnfAlgo::GetOutputDeviceDataType(node, j);
  181. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  182. std::string op_name = GetOpNameWithoutScope(*kernel_name);
  183. uint32_t task_id = 0;
  184. uint32_t stream_id = 0;
  185. uint64_t timestamp = GetTimeStamp();
  186. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  187. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output." +
  188. std::to_string(j);
  189. if (DumpJsonParser::GetInstance().IsStatisticDump() &&
  190. (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag())) {
  191. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, false, j, j);
  192. (void)stat_dump.DumpTensorStatsToFile(GetKernelNodeName(node), dump_path, debugger);
  193. }
  194. if (DumpJsonParser::GetInstance().IsTensorDump()) {
  195. if (IsDeviceTargetGPU()) {
  196. DumpGPUMemToFile(file_path, GetKernelNodeName(node), *addr, int_shapes, type, device_type, trans_flag, j,
  197. debugger);
  198. } else {
  199. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  200. }
  201. }
  202. }
  203. }
  204. void E2eDump::DumpOutputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  205. std::string *kernel_name) {
  206. auto debugger = Debugger::GetInstance();
  207. MS_EXCEPTION_IF_NULL(debugger);
  208. if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) {
  209. MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
  210. return;
  211. }
  212. MS_EXCEPTION_IF_NULL(node);
  213. GetFileKernelName(NOT_NULL(kernel_name));
  214. auto output_size = common::AnfAlgo::GetOutputTensorNum(node);
  215. for (size_t j = 0; j < output_size; ++j) {
  216. if (!AnfAlgo::OutputAddrExist(node, j)) {
  217. continue;
  218. }
  219. auto addr = AnfAlgo::GetOutputAddr(node, j);
  220. MS_EXCEPTION_IF_NULL(addr);
  221. ShapeVector int_shapes;
  222. GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
  223. auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
  224. std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, false);
  225. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  226. }
  227. }
  228. void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
  229. MS_EXCEPTION_IF_NULL(graph);
  230. auto &dump_json_parser = DumpJsonParser::GetInstance();
  231. if (!dump_json_parser.InputNeedDump()) {
  232. return;
  233. }
  234. MS_LOG(INFO) << "Start e2e dump input";
  235. bool trans_flag = dump_json_parser.trans_flag();
  236. const auto &apply_kernels = graph->execution_order();
  237. for (const auto &node : apply_kernels) {
  238. MS_EXCEPTION_IF_NULL(node);
  239. std::string kernel_name = GetKernelNodeName(node);
  240. if (!dump_json_parser.NeedDump(kernel_name)) {
  241. continue;
  242. }
  243. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  244. DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
  245. }
  246. }
  247. void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger,
  248. const KernelLaunchInfo *launch_info) {
  249. auto &dump_json_parser = DumpJsonParser::GetInstance();
  250. if (!dump_json_parser.InputNeedDump()) {
  251. return;
  252. }
  253. bool trans_flag = dump_json_parser.trans_flag();
  254. MS_EXCEPTION_IF_NULL(node);
  255. std::string kernel_name = GetKernelNodeName(node);
  256. if (!dump_json_parser.NeedDump(kernel_name)) {
  257. return;
  258. }
  259. DumpJsonParser::GetInstance().MatchKernel(kernel_name);
  260. DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger, launch_info);
  261. }
  262. std::shared_ptr<device::DeviceAddress> CreateAscendDeviceAddress(const KernelLaunchInfo *launch_info, size_t index,
  263. TypeId type) {
  264. MS_EXCEPTION_IF_NULL(launch_info);
  265. auto addr_ptr = launch_info->inputs_[index];
  266. auto ms_context = MsContext::GetInstance();
  267. MS_EXCEPTION_IF_NULL(ms_context);
  268. auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
  269. auto device_context =
  270. device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({kAscendDevice, device_id});
  271. auto format = kOpFormat_DEFAULT;
  272. MS_EXCEPTION_IF_NULL(addr_ptr);
  273. return device_context->CreateDeviceAddress(addr_ptr->addr, addr_ptr->size, format, type);
  274. }
  275. void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  276. std::string *kernel_name, const Debugger *debugger, const KernelLaunchInfo *launch_info) {
  277. MS_EXCEPTION_IF_NULL(node);
  278. GetFileKernelName(NOT_NULL(kernel_name));
  279. auto input_size = common::AnfAlgo::GetInputTensorNum(node);
  280. for (size_t j = 0; j < input_size; ++j) {
  281. auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
  282. auto input = kernel_with_index.first;
  283. auto index = kernel_with_index.second;
  284. if (!AnfAlgo::OutputAddrExist(input, index)) {
  285. continue;
  286. }
  287. std::string tensor_name = GetKernelNodeName(node);
  288. size_t slot = j;
  289. if (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  290. auto input_kernel = node->input(j + 1);
  291. std::string input_kernel_name = GetKernelNodeName(input_kernel);
  292. tensor_name = input_kernel_name;
  293. slot = 0;
  294. }
  295. ShapeVector int_shapes;
  296. GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
  297. auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
  298. auto device_type = AnfAlgo::GetOutputDeviceDataType(input, index);
  299. std::string op_type = common::AnfAlgo::GetCNodeName(node);
  300. std::string op_name = GetOpNameWithoutScope(*kernel_name);
  301. uint64_t timestamp = GetTimeStamp();
  302. uint32_t task_id = 0;
  303. uint32_t stream_id = 0;
  304. std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
  305. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j);
  306. auto addr = AnfAlgo::GetOutputAddr(input, index);
  307. MS_EXCEPTION_IF_NULL(addr);
  308. if (DumpJsonParser::GetInstance().IsStatisticDump() &&
  309. (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag())) {
  310. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, true, j, slot);
  311. (void)stat_dump.DumpTensorStatsToFile(tensor_name, dump_path, debugger);
  312. }
  313. if (DumpJsonParser::GetInstance().IsTensorDump()) {
  314. if (IsDeviceTargetGPU()) {
  315. DumpGPUMemToFile(file_path, tensor_name, *addr, int_shapes, type, device_type, trans_flag, slot, debugger);
  316. } else if (Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  317. // load address from launch_info when it's Ascend Kernel by kernel mode.
  318. auto ascend_device_addr = CreateAscendDeviceAddress(launch_info, j, type);
  319. DumpMemToFile(file_path, *ascend_device_addr, int_shapes, type, trans_flag);
  320. } else {
  321. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  322. }
  323. }
  324. }
  325. }
  326. void E2eDump::DumpInputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
  327. std::string *kernel_name) {
  328. auto debugger = Debugger::GetInstance();
  329. MS_EXCEPTION_IF_NULL(debugger);
  330. if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) {
  331. MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
  332. return;
  333. }
  334. MS_EXCEPTION_IF_NULL(node);
  335. GetFileKernelName(NOT_NULL(kernel_name));
  336. auto input_size = common::AnfAlgo::GetInputTensorNum(node);
  337. for (size_t j = 0; j < input_size; ++j) {
  338. auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
  339. auto input = kernel_with_index.first;
  340. auto index = kernel_with_index.second;
  341. if (!AnfAlgo::OutputAddrExist(input, index)) {
  342. continue;
  343. }
  344. auto addr = AnfAlgo::GetOutputAddr(input, index);
  345. MS_EXCEPTION_IF_NULL(addr);
  346. ShapeVector int_shapes;
  347. GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
  348. auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
  349. std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, true);
  350. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  351. }
  352. }
  353. void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
  354. bool trans_flag, const Debugger *debugger) {
  355. MS_EXCEPTION_IF_NULL(anf_node);
  356. auto &dump_json_parser = DumpJsonParser::GetInstance();
  357. if ((!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) || IsValueNode<StringImm>(anf_node)) {
  358. return;
  359. }
  360. std::string node_name = GetKernelNodeName(anf_node);
  361. if (!dump_json_parser.NeedDump(node_name)) {
  362. return;
  363. }
  364. DumpJsonParser::GetInstance().MatchKernel(node_name);
  365. GetFileKernelName(NOT_NULL(&node_name));
  366. std::string dump_name = node_name;
  367. const std::string cst_prefix = "Default--";
  368. if (anf_node->isa<ValueNode>()) {
  369. if (dump_name.find(cst_prefix) == std::string::npos) {
  370. MS_LOG(INFO) << "Incorrect constant format: " << dump_name;
  371. return;
  372. }
  373. dump_name = node_name.substr(cst_prefix.length());
  374. trans_flag = false;
  375. }
  376. // check if output address exists, if not, return;
  377. if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) {
  378. return;
  379. }
  380. auto addr = AnfAlgo::GetOutputAddr(anf_node, output_index);
  381. MS_EXCEPTION_IF_NULL(addr);
  382. ShapeVector int_shapes;
  383. GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes), trans_flag);
  384. auto type = common::AnfAlgo::GetOutputInferDataType(anf_node, output_index);
  385. auto device_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
  386. uint64_t timestamp = GetTimeStamp();
  387. uint32_t task_id = 0;
  388. uint32_t stream_id = 0;
  389. std::string file_path = dump_path + "/Parameter." + dump_name + '.' + std::to_string(task_id) + '.' +
  390. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
  391. if (IsDeviceTargetGPU()) {
  392. if (dump_json_parser.IsStatisticDump()) {
  393. TensorStatDump stat_dump("Parameter", dump_name, task_id, stream_id, timestamp, false, 0, 0);
  394. (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
  395. }
  396. if (dump_json_parser.IsTensorDump()) {
  397. DumpGPUMemToFile(file_path, node_name, *addr, int_shapes, type, device_type, trans_flag, 0, debugger);
  398. }
  399. } else {
  400. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  401. }
  402. }
  403. /*
  404. * Feature group: Dump.
  405. * Target device group: Ascend, GPU.
  406. * Runtime category: MindRT.
  407. * Description: This function is similar to DumpSingleAnfNode function but it is only for dumping parameters in mindRT.
  408. * This function uses GetParameterInfo to get dump info for the parameter node.
  409. */
  410. void E2eDump::DumpSingleParameterNode(const AnfNodePtr &anf_node, const std::string &dump_path, bool trans_flag,
  411. const Debugger *debugger) {
  412. MS_EXCEPTION_IF_NULL(anf_node);
  413. auto &dump_json_parser = DumpJsonParser::GetInstance();
  414. std::string node_name = GetKernelNodeName(anf_node);
  415. if (!anf_node->isa<Parameter>() || !dump_json_parser.NeedDump(node_name) || !dump_json_parser.OutputNeedDump()) {
  416. return;
  417. }
  418. DumpJsonParser::GetInstance().MatchKernel(node_name);
  419. GetFileKernelName(NOT_NULL(&node_name));
  420. ShapeVector int_shapes;
  421. TypeId type;
  422. TypeId device_type;
  423. auto addr = GetParameterInfo(anf_node, NOT_NULL(&int_shapes), NOT_NULL(&type), NOT_NULL(&device_type));
  424. if (addr == nullptr) {
  425. MS_LOG(DEBUG) << "Skip node: " << node_name << ". Parameter data is not available for mindRT.";
  426. return;
  427. }
  428. uint64_t timestamp = GetTimeStamp();
  429. uint32_t task_id = 0;
  430. uint32_t stream_id = 0;
  431. std::string file_path = dump_path + "/Parameter." + node_name + '.' + std::to_string(task_id) + '.' +
  432. std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
  433. if (IsDeviceTargetGPU()) {
  434. if (dump_json_parser.IsStatisticDump()) {
  435. TensorStatDump stat_dump("Parameter", node_name, task_id, stream_id, timestamp, false, 0, 0);
  436. (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
  437. }
  438. if (dump_json_parser.IsTensorDump()) {
  439. DumpGPUMemToFile(file_path, node_name, *addr, int_shapes, type, device_type, trans_flag, 0, debugger);
  440. }
  441. } else {
  442. DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
  443. }
  444. }
  445. void E2eDump::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path,
  446. const Debugger *debugger) {
  447. MS_EXCEPTION_IF_NULL(graph);
  448. auto &dump_json_parser = DumpJsonParser::GetInstance();
  449. if (!dump_json_parser.OutputNeedDump()) {
  450. return;
  451. }
  452. MS_LOG(INFO) << "Start e2e dump parameters";
  453. bool trans_flag = dump_json_parser.trans_flag();
  454. // dump parameters
  455. const auto &parameters = graph->inputs();
  456. for (auto &item : parameters) {
  457. DumpSingleAnfNode(item, PARAMETER_OUTPUT_INDEX, dump_path, trans_flag, debugger);
  458. }
  459. }
  460. void E2eDump::DumpConstantData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
  461. MS_EXCEPTION_IF_NULL(graph);
  462. auto &dump_json_parser = DumpJsonParser::GetInstance();
  463. if (!IsDeviceTargetGPU() || !dump_json_parser.e2e_dump_enabled()) {
  464. return;
  465. }
  466. uint32_t graph_id = graph->graph_id();
  467. std::string cst_path = GenerateDumpPath(graph_id, rank_id, true);
  468. if (!Common::FileExists(cst_path)) {
  469. DumpConstantData(graph, cst_path, debugger);
  470. }
  471. }
  472. void E2eDump::DumpConstantData(const session::KernelGraph *graph, const std::string &cst_dump_path,
  473. const Debugger *debugger) {
  474. // Dump constant to npy file
  475. MS_EXCEPTION_IF_NULL(graph);
  476. auto &dump_json_parser = DumpJsonParser::GetInstance();
  477. MS_LOG(INFO) << "DumpConstants. Current iteration is " << dump_json_parser.cur_dump_iter();
  478. MS_LOG(INFO) << "Current graph id is " << graph->graph_id();
  479. if (!dump_json_parser.OutputNeedDump()) {
  480. return;
  481. }
  482. const auto value_nodes = graph->graph_value_nodes();
  483. for (auto &item : value_nodes) {
  484. DumpSingleAnfNode(item, VALUE_NODE_OUTPUT_INDEX, cst_dump_path, false, debugger);
  485. }
  486. }
  487. /*
  488. * Feature group: Dump.
  489. * Target device group: Ascend, GPU.
  490. * Runtime category: Old runtime.
  491. * Description: This function is for updating dump iteration for GPU and ascend old runtime.
  492. */
  493. void E2eDump::UpdateIterOldRTDump(const session::KernelGraph *graph) {
  494. MS_EXCEPTION_IF_NULL(graph);
  495. auto &dump_json_parser = DumpJsonParser::GetInstance();
  496. uint32_t graph_id = graph->graph_id();
  497. if (IsDeviceTargetGPU()) {
  498. if (starting_graph_id == INT32_MAX) {
  499. starting_graph_id = graph_id;
  500. } else if (starting_graph_id == graph_id && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
  501. // Update dump iter for mindrt runtime is done using UpdateIterGPUDump().
  502. // Update dump iter for GPU old runtime.
  503. dump_json_parser.UpdateDumpIter();
  504. }
  505. return;
  506. }
  507. // If device target is Ascend
  508. if (graph->IsDatasetGraph()) {
  509. MS_LOG(INFO) << "No need to update iteration for dataset graph.";
  510. return;
  511. }
  512. // In multi network scripts, dump iter is equal to the number of networks that have been executed so far.
  513. dump_json_parser.UpdateDumpIter();
  514. }
  515. /*
  516. * Feature group: Dump.
  517. * Target device group: Ascend, GPU.
  518. * Runtime category: MindRT.
  519. * Description: This function is for updating dump iteration for GPU and ascend MindRT dump. Please note that dump with
  520. * dataset_sink_mode = True is not supported for GPU.
  521. */
  522. void E2eDump::UpdateIterMindRTDump() {
  523. auto debugger = Debugger::GetInstance();
  524. // Dataset graph is always the first graph in the list when dataset_sink_mode is true.
  525. auto graph = (debugger->GetStepGraphPtrList())[0];
  526. auto context = MsContext::GetInstance();
  527. MS_EXCEPTION_IF_NULL(context);
  528. if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice && graph->IsDatasetGraph()) {
  529. MS_LOG(INFO) << "No need to update iteration for dataset graph.";
  530. return;
  531. }
  532. // update dump iter for GPU and kernel by kernel ascend dump.
  533. DumpJsonParser::GetInstance().UpdateDumpIter();
  534. }
  535. /*
  536. * Feature group: Dump.
  537. * Target device group: Ascend, GPU.
  538. * Runtime category: Old runtime, MindRT.
  539. * Description: Generates graph history files (dumping all the iteration numbers in which the graph was executed) for
  540. * the given graph and rank_id. If dataset_sink_mode is true for async dump in ascend, this function is called once per
  541. * each epoch and dumps all the iterations in the epoch to the graph history file.
  542. */
  543. void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
  544. auto &json_parser = DumpJsonParser::GetInstance();
  545. if (!(json_parser.async_dump_enabled() || json_parser.e2e_dump_enabled())) {
  546. return;
  547. }
  548. bool sink_mode = (ConfigManager::GetInstance().dataset_mode() || graph->IsDatasetGraph());
  549. auto iter_num = SizeToInt(LongToSize(ConfigManager::GetInstance().iter_num()));
  550. if (graph->IsDatasetGraph()) {
  551. MS_LOG(INFO) << "graph: " << graph->graph_id() << " is dataset graph, not creating graph history file.";
  552. return;
  553. }
  554. if (!IsDeviceTargetGPU() && (graph->graph_id() != graph->root_graph_id())) {
  555. // when device target is ascend, we only dump graph run iter for the root graph.
  556. return;
  557. }
  558. std::string execution_order_path = json_parser.path() + "/rank_" + std::to_string(rank_id) + "/execution_order/";
  559. std::string graph_str =
  560. IsDeviceTargetGPU() ? std::to_string(graph->graph_id()) : std::to_string(graph->root_graph_id());
  561. std::string file_name_to_check = execution_order_path + "/ms_global_execution_order_graph_" + graph_str + ".csv";
  562. auto real_path = Common::CreatePrefixPath(file_name_to_check);
  563. if (!real_path.has_value()) {
  564. MS_LOG(WARNING) << "Check file path: " << file_name_to_check << " failed.";
  565. return;
  566. }
  567. std::string file_name = real_path.value();
  568. ChangeFileMode(file_name, S_IWUSR);
  569. std::ofstream fout(file_name, std::ofstream::app);
  570. if (!fout.is_open()) {
  571. MS_LOG(WARNING) << "Open file for saving graph global execution order failed.";
  572. return;
  573. }
  574. if (sink_mode && json_parser.async_dump_enabled() && !Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
  575. // for async dump when sink_mode = true, cur_dump_iter() = current_epoch
  576. // dump history for all iterations in the epoch
  577. Debugger::GetInstance()->UpdateGraphIterMap(graph->graph_id(), iter_num);
  578. auto graph_iter_map = Debugger::GetInstance()->GetGraphIterMap();
  579. auto step_per_epoch = IntToSize(graph_iter_map[graph->graph_id()]);
  580. for (size_t i = 0; i < step_per_epoch; i++) {
  581. auto step = (json_parser.cur_dump_iter() * step_per_epoch) + i;
  582. fout << (std::to_string(step) + "\n");
  583. }
  584. } else {
  585. fout << std::to_string(json_parser.cur_dump_iter()) + "\n";
  586. }
  587. fout.close();
  588. ChangeFileMode(file_name, S_IRUSR);
  589. }
  590. /*
  591. * Feature group: Dump.
  592. * Target device group: Ascend, GPU.
  593. * Runtime category: Old runtime, MindRT.
  594. * Description: This function is for dumping the whole graph. It is used for old runtime in GPU and Ascend and
  595. * super-kernel mindRT in Ascend.
  596. */
  597. void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
  598. MS_EXCEPTION_IF_NULL(graph);
  599. bool success = false;
  600. auto &dump_json_parser = DumpJsonParser::GetInstance();
  601. uint32_t graph_id = graph->graph_id();
  602. if (!dump_json_parser.e2e_dump_enabled()) {
  603. return;
  604. }
  605. if (dump_json_parser.GetIterDumpFlag()) {
  606. MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
  607. MS_LOG(INFO) << "Current graph id is " << graph_id;
  608. std::string dump_path = GenerateDumpPath(graph_id, rank_id);
  609. if (dump_json_parser.IsStatisticDump()) {
  610. (void)TensorStatDump::OpenStatisticsFile(dump_path);
  611. }
  612. DumpInput(graph, dump_path, debugger);
  613. DumpOutput(graph, dump_path, debugger);
  614. if (!MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
  615. // Dump parameters for old runtime. For mindRT it is done in PostExecuteGraphDebugger.
  616. DumpParameters(graph, dump_path, debugger);
  617. // DumpConstantData for GPU old runtime.
  618. DumpConstantData(graph, rank_id, debugger);
  619. }
  620. if (dump_json_parser.IsStatisticDump()) {
  621. CsvWriter::GetInstance().CloseFile();
  622. }
  623. success = true;
  624. }
  625. if (success) {
  626. MS_LOG(DEBUG) << "E2eDump Dump Data completed!";
  627. } else {
  628. MS_LOG(DEBUG) << "E2eDump Dump has not occurred!";
  629. }
  630. }
  631. /*
  632. * Feature group: Dump.
  633. * Target device group: Ascend, GPU.
  634. * Runtime category: MindRT.
  635. * Description: This function is for dumping a single node. It is used for mindrt in GPU and Ascend kernel-by-kernel.
  636. */
  637. bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger,
  638. const KernelLaunchInfo *launch_info) {
  639. bool success = false;
  640. auto &dump_json_parser = DumpJsonParser::GetInstance();
  641. if (dump_json_parser.DumpEnabledForIter()) {
  642. std::string dump_path = GenerateDumpPath(graph_id, rank_id);
  643. DumpInputSingleNode(node, dump_path, debugger, launch_info);
  644. DumpOutputSingleNode(node, dump_path, debugger);
  645. success = true;
  646. }
  647. return success;
  648. }
  649. /*
  650. * Feature group: Dump.
  651. * Target device group: Ascend, GPU.
  652. * Runtime category: MindRT.
  653. * Description: This function is for dumping all the parameters in the current root graph for GPU, Ascend superkernel
  654. * (e2e dump) and Ascend kernel-by-kernel (e2e and async dump).
  655. */
  656. void E2eDump::DumpParametersData(uint32_t rank_id, const Debugger *debugger) {
  657. uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
  658. auto &dump_json_parser = DumpJsonParser::GetInstance();
  659. if (dump_json_parser.async_dump_enabled() && !debugger->GetAscendKernelByKernelFlag()) {
  660. // Dump parameters for mindRT in async dump only for kernel by kernel mode.
  661. return;
  662. }
  663. if (dump_json_parser.DumpEnabledForIter()) {
  664. MS_LOG(INFO) << "DumpParameters. Current iteration is " << dump_json_parser.cur_dump_iter();
  665. MS_LOG(INFO) << "Current root graph id is " << root_graph_id;
  666. std::string dump_path = GenerateDumpPath(root_graph_id, rank_id);
  667. bool trans_flag = dump_json_parser.trans_flag();
  668. for (auto &item : debugger->GetParametersMindRT()) {
  669. DumpSingleParameterNode(item, dump_path, trans_flag, debugger);
  670. }
  671. }
  672. }
  673. #ifdef ENABLE_D
  674. template <typename T>
  675. dump_data_t ParseAttrsFromDumpData(const std::string &dump_path, char *data_ptr, const T &tensor, const std::string &io,
  676. uint32_t slot) {
  677. // get data type
  678. auto iter_dtype = kDataTypetoMSTypeMap.find(tensor.data_type());
  679. if (iter_dtype == kDataTypetoMSTypeMap.end()) {
  680. MS_LOG(INFO) << "Unsupported data type for tensor " << dump_path << ": unknown(" << tensor.data_type() << ")";
  681. return dump_data_t{};
  682. }
  683. auto data_type = iter_dtype->second;
  684. // get format
  685. auto iter_fmt = kFormatToStringMap.find(tensor.format());
  686. if (iter_fmt == kFormatToStringMap.end()) {
  687. MS_LOG(INFO) << "Unsupported tensor format for tensor " << dump_path << ": unknown(" << tensor.format() << ")";
  688. return dump_data_t{};
  689. }
  690. std::string device_format = iter_fmt->second;
  691. // get shape
  692. ShapeVector shape_d;
  693. (void)std::transform(tensor.shape().dim().begin(), tensor.shape().dim().end(), std::back_inserter(shape_d),
  694. SizeToLong);
  695. ShapeVector shape_to;
  696. (void)std::transform(tensor.original_shape().dim().begin(), tensor.original_shape().dim().end(),
  697. std::back_inserter(shape_to), SizeToLong);
  698. // get size and sub_format
  699. size_t data_size = (size_t)tensor.size();
  700. int32_t sub_format = tensor.sub_format();
  701. return dump_data_t{dump_path, data_ptr, data_type, device_format, shape_d, shape_to, data_size, sub_format, io, slot};
  702. }
  703. /*
  704. * Feature group: Dump.
  705. * Target device group: Ascend.
  706. * Runtime category: Old runtime, MindRT.
  707. * Description: This function is for ascend A+M dump only. It parses and converts each slot of tensor in DumpData object
  708. * and dump the tensor data in npy file or statistic data in csv file.
  709. */
  710. void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
  711. char *data_ptr) {
  712. std::vector<dump_data_t> dump_tensor_vec;
  713. // dump input tensors
  714. std::vector<debugger::dump::OpInput> input_tensors(dump_data.input().begin(), dump_data.input().end());
  715. uint64_t offset = 0;
  716. for (uint32_t slot = 0; slot < input_tensors.size(); slot++) {
  717. auto in_tensor = input_tensors[slot];
  718. dump_tensor_vec.push_back(ParseAttrsFromDumpData(dump_path, data_ptr + offset, in_tensor, "input", slot));
  719. offset += in_tensor.size();
  720. }
  721. // dump output tensors
  722. std::vector<debugger::dump::OpOutput> output_tensors(dump_data.output().begin(), dump_data.output().end());
  723. for (uint32_t slot = 0; slot < output_tensors.size(); slot++) {
  724. auto out_tensor = output_tensors[slot];
  725. dump_tensor_vec.push_back(ParseAttrsFromDumpData(dump_path, data_ptr + offset, out_tensor, "output", slot));
  726. offset += out_tensor.size();
  727. }
  728. // assign slot conversion task to different thread.
  729. if (dump_tensor_vec.empty()) {
  730. return;
  731. }
  732. auto default_num_workers = std::max<uint32_t>(1, std::thread::hardware_concurrency() / 4);
  733. auto num_threads = std::min<uint32_t>(default_num_workers, dump_tensor_vec.size());
  734. uint32_t task_size = dump_tensor_vec.size() / num_threads;
  735. uint32_t remainder = dump_tensor_vec.size() % num_threads;
  736. std::vector<std::thread> threads;
  737. threads.reserve(num_threads);
  738. MS_LOG(INFO) << "Number of threads used for A+M dump: " << num_threads;
  739. for (size_t t = 0; t < threads.capacity(); t++) {
  740. uint32_t start_idx = t * task_size;
  741. uint32_t end_idx = start_idx + task_size - 1;
  742. if (t == num_threads - 1) {
  743. end_idx += remainder;
  744. }
  745. threads.emplace_back(std::thread(&E2eDump::ConvertFormatForTensors, std::ref(dump_tensor_vec), start_idx, end_idx));
  746. }
  747. for (size_t t = 0; t < threads.capacity(); t++) {
  748. threads[t].join();
  749. }
  750. }
  751. void E2eDump::ConvertFormatForTensors(const std::vector<dump_data_t> &dump_tensor_vec, uint32_t start_idx,
  752. uint32_t end_idx) {
  753. for (uint32_t idx = start_idx; idx <= end_idx; idx++) {
  754. auto succ = ConvertFormatForTensorAndDump(dump_tensor_vec[idx]);
  755. if (!succ) {
  756. MS_LOG(INFO) << "Failed to convert format for tensor " << dump_tensor_vec[idx].dump_file_path << "."
  757. << dump_tensor_vec[idx].in_out_str << "." << dump_tensor_vec[idx].slot;
  758. }
  759. }
  760. }
  761. /*
  762. * Feature group: Dump.
  763. * Target device group: Ascend.
  764. * Runtime category: Old runtime, MindRT.
  765. * Description: It serves for A+M dump. Save statistic of the tensor data into dump path as configured.
  766. */
  767. bool DumpTensorStatsIfNeeded(const dump_data_t &dump_tensor_info, char *data_ptr) {
  768. // dump_path: dump_dir/op_type.op_name.task_id.stream_id.timestamp
  769. if (!DumpJsonParser::GetInstance().IsStatisticDump()) {
  770. return true;
  771. }
  772. std::string dump_path = dump_tensor_info.dump_file_path;
  773. size_t pos = dump_path.rfind("/");
  774. std::string file_name = dump_path.substr(pos + 1);
  775. size_t first_dot = file_name.find(".");
  776. size_t fourth_dot = file_name.rfind(".");
  777. size_t third_dot = file_name.rfind(".", fourth_dot - 1);
  778. size_t second_dot = file_name.rfind(".", third_dot - 1);
  779. if (first_dot == std::string::npos || second_dot == std::string::npos || third_dot == std::string::npos ||
  780. first_dot == second_dot) {
  781. MS_LOG(ERROR) << "Dump path " << dump_path << " received is not well formed";
  782. return false;
  783. }
  784. std::string op_type = file_name.substr(0, first_dot);
  785. std::string op_name = file_name.substr(first_dot + 1, second_dot - first_dot - 1);
  786. std::string task_id = file_name.substr(second_dot + 1, third_dot - second_dot - 1);
  787. std::string stream_id = file_name.substr(third_dot + 1, fourth_dot - third_dot - 1);
  788. std::string timestamp = file_name.substr(fourth_dot + 1);
  789. TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, dump_tensor_info.in_out_str,
  790. dump_tensor_info.slot, dump_tensor_info.slot);
  791. std::shared_ptr<TensorData> data = std::make_shared<TensorData>();
  792. if (dump_tensor_info.data_type <= TypeId::kNumberTypeBegin ||
  793. dump_tensor_info.data_type >= TypeId::kNumberTypeComplex64) {
  794. MS_LOG(ERROR) << "Data type of operator " << file_name << " is not supported by statistic dump";
  795. return false;
  796. }
  797. data->SetType(dump_tensor_info.data_type);
  798. data->SetByteSize(dump_tensor_info.data_size);
  799. data->SetShape(dump_tensor_info.host_shape);
  800. data->SetDataPtr(data_ptr);
  801. return stat_dump.DumpTensorStatsToFile(dump_path.substr(0, pos), data);
  802. }
  803. /*
  804. * Feature group: Dump.
  805. * Target device group: Ascend.
  806. * Runtime category: Old runtime, MindRT.
  807. * Description: It serves for A+M dump. Parse each attributes in Dumpdata proto object from device format to mindspore
  808. * supported format and save tensor data or statistic as configured.
  809. */
  810. bool E2eDump::ConvertFormatForTensorAndDump(const dump_data_t &dump_tensor_info) {
  811. // dump_path: dump_dir/op_type.op_name.task_id.stream_id.timestamp
  812. std::ostringstream dump_path_ss;
  813. dump_path_ss << dump_tensor_info.dump_file_path << "." << dump_tensor_info.in_out_str << "." << dump_tensor_info.slot
  814. << ".";
  815. std::string dump_path_slot = dump_path_ss.str();
  816. bool trans_success = false;
  817. auto trans_buf = std::vector<uint8_t>(dump_tensor_info.data_size);
  818. // convert format to host format. It can be either NCHW or ND (non 4-dimemsions).
  819. const uint8_t kNumFourDim = 4;
  820. std::string host_format;
  821. std::string device_format = dump_tensor_info.format;
  822. if (dump_tensor_info.host_shape.size() == kNumFourDim) {
  823. host_format = kOpFormat_NCHW;
  824. } else {
  825. host_format = kOpFormat_ND;
  826. }
  827. if (device_format != host_format) {
  828. auto iter = kSuppTransFormatPair.find(std::make_pair(device_format, host_format));
  829. if (iter == kSuppTransFormatPair.end()) {
  830. MS_LOG(INFO) << "Do not support convert from format " << device_format << " to " << host_format << " for tensor "
  831. << dump_path_slot;
  832. } else {
  833. const trans::FormatArgs format_args{dump_tensor_info.data_ptr,
  834. dump_tensor_info.data_size,
  835. host_format,
  836. device_format,
  837. dump_tensor_info.host_shape,
  838. dump_tensor_info.device_shape,
  839. dump_tensor_info.data_type};
  840. auto group = dump_tensor_info.sub_format > 1 ? dump_tensor_info.sub_format : 1;
  841. trans_success = trans::TransFormatFromDeviceToHost(format_args, trans_buf.data(), group);
  842. if (!trans_success) {
  843. MS_LOG(ERROR) << "Trans format failed.";
  844. }
  845. }
  846. }
  847. // dump tensor data into npy file
  848. bool dump_success = true;
  849. if (trans_success) {
  850. dump_success = DumpTensorStatsIfNeeded(dump_tensor_info, reinterpret_cast<char *>(trans_buf.data()));
  851. if (DumpJsonParser::GetInstance().IsTensorDump()) {
  852. dump_path_slot += host_format;
  853. dump_success = DumpJsonParser::DumpToFile(dump_path_slot, trans_buf.data(), dump_tensor_info.data_size,
  854. dump_tensor_info.host_shape, dump_tensor_info.data_type) &&
  855. dump_success;
  856. }
  857. } else {
  858. dump_success = DumpTensorStatsIfNeeded(dump_tensor_info, dump_tensor_info.data_ptr);
  859. if (DumpJsonParser::GetInstance().IsTensorDump()) {
  860. dump_path_slot += device_format;
  861. dump_success = DumpJsonParser::DumpToFile(dump_path_slot, dump_tensor_info.data_ptr, dump_tensor_info.data_size,
  862. dump_tensor_info.host_shape, dump_tensor_info.data_type) &&
  863. dump_success;
  864. }
  865. }
  866. return dump_success;
  867. }
  868. uint64_t UnpackUint64Value(char *ptr) {
  869. #if defined(__APPLE__)
  870. return *reinterpret_cast<const uint64_t *>(ptr);
  871. #else
  872. return le64toh(*reinterpret_cast<const uint64_t *>(ptr));
  873. #endif
  874. }
  875. std::string IntToHexString(const uint64_t value) {
  876. std::stringstream ss;
  877. ss << "0x" << std::hex << value;
  878. return ss.str();
  879. }
  880. nlohmann::json E2eDump::ParseOverflowInfo(char *data_ptr) {
  881. uint32_t index = 0;
  882. uint64_t model_id = UnpackUint64Value(data_ptr);
  883. index += kUint64Size;
  884. uint64_t stream_id = UnpackUint64Value(data_ptr + index);
  885. index += kUint64Size;
  886. uint64_t task_id = UnpackUint64Value(data_ptr + index);
  887. index += kUint64Size;
  888. uint64_t task_type = UnpackUint64Value(data_ptr + index);
  889. index += kUint64Size;
  890. uint64_t pc_start = UnpackUint64Value(data_ptr + index);
  891. index += kUint64Size;
  892. uint64_t para_base = UnpackUint64Value(data_ptr + index);
  893. nlohmann::json overflow_info;
  894. overflow_info["model_id"] = model_id;
  895. overflow_info["stream_id"] = stream_id;
  896. overflow_info["task_id"] = task_id;
  897. overflow_info["task_type"] = task_type;
  898. overflow_info["pc_start"] = IntToHexString(pc_start);
  899. overflow_info["para_base"] = IntToHexString(para_base);
  900. return overflow_info;
  901. }
  902. /*
  903. * Feature group: Dump.
  904. * Target device group: Ascend.
  905. * Runtime category: Old runtime, MindRT.
  906. * Description: This function is for Ascend A+M dump. It parses and dump op overflow info in json file.
  907. */
  908. void E2eDump::DumpOpDebugToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
  909. char *data_ptr) {
  910. std::string out_path = dump_path + ".output.";
  911. std::vector<debugger::dump::OpOutput> op_debug(dump_data.output().begin(), dump_data.output().end());
  912. for (uint32_t slot = 0; slot < op_debug.size(); slot++) {
  913. uint32_t index = 0;
  914. // parse DHA Atomic Add info
  915. nlohmann::json dha_atomic_add_info = ParseOverflowInfo(data_ptr + index);
  916. index += kDhaAtomicAddInfoSize;
  917. // parse L2 Atomic Add info
  918. nlohmann::json l2_atomic_add_info = ParseOverflowInfo(data_ptr + index);
  919. index += kL2AtomicAddInfoSize;
  920. // parse AICore info
  921. nlohmann::json ai_core_info = ParseOverflowInfo(data_ptr + index);
  922. index += kAiCoreInfoSize;
  923. // parse DHA Atomic Add status
  924. dha_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
  925. index += kDhaAtomicAddStatusSize;
  926. // parse L2 Atomic Add status
  927. l2_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
  928. index += kL2AtomicAddStatusSize;
  929. // parse AICore status
  930. uint64_t kernel_code = UnpackUint64Value(data_ptr + index);
  931. index += kUint64Size;
  932. uint64_t block_idx = UnpackUint64Value(data_ptr + index);
  933. index += kUint64Size;
  934. uint64_t status = UnpackUint64Value(data_ptr + index);
  935. ai_core_info["kernel_code"] = IntToHexString(kernel_code);
  936. ai_core_info["block_idx"] = block_idx;
  937. ai_core_info["status"] = status;
  938. nlohmann::json opdebug_data;
  939. opdebug_data["DHA Atomic Add"] = dha_atomic_add_info;
  940. opdebug_data["L2 Atomic Add"] = l2_atomic_add_info;
  941. opdebug_data["AI Core"] = ai_core_info;
  942. // save json to file
  943. DumpToFile(out_path + std::to_string(slot) + ".json", opdebug_data.dump());
  944. }
  945. }
  946. #endif // ENABLE_D
  947. } // namespace mindspore