| @@ -445,6 +445,9 @@ void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap | |||
| if (debugger_) { | |||
| debugger_->PreExecute(kernel_graph, graph_sum_); | |||
| } | |||
| DumpSetup(kernel_graph); | |||
| #if ENABLE_CPU && ENABLE_GPU | |||
| // Initialize parameter server | |||
| InitPSParamAndOptim(kernel_graph, inputs); | |||
| @@ -459,13 +462,12 @@ void GPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_gra | |||
| if (context_ptr->get_param<bool>(MS_CTX_ENABLE_GPU_SUMMARY)) { | |||
| Summary(kernel_graph.get()); | |||
| } | |||
| bool dump_enabled = DumpDataEnabledIteration(); | |||
| // debug used for dump | |||
| if (debugger_ && dump_enabled) { | |||
| if (debugger_ && debugger_->CheckDebuggerDumpEnabled()) { | |||
| Dump(kernel_graph); | |||
| } else { | |||
| DumpJsonParser::GetInstance().UpdateDumpIter(); | |||
| } | |||
| if (debugger_) { | |||
| debugger_->PostExecute(); | |||
| } | |||
| @@ -600,6 +602,13 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, | |||
| } | |||
| } | |||
| void GPUSession::DumpSetup(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| MS_LOG(INFO) << "Start!"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| E2eDump::DumpSetup(kernel_graph.get(), rank_id_); | |||
| MS_LOG(INFO) << "Finish!"; | |||
| } | |||
| void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| @@ -86,6 +86,8 @@ class GPUSession : public SessionBasic { | |||
| void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void DumpSetup(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| bool DumpDataEnabledIteration() const; | |||
| GraphId CompileGraphImpl(KernelGraphPtr kernel_graph); | |||
| @@ -206,16 +206,6 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s | |||
| } | |||
| void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) { | |||
| auto common_dump_settings = CheckJsonKeyExist(content, kCommonDumpSettings); | |||
| auto dump_mode = CheckJsonKeyExist(*common_dump_settings, kDumpMode); | |||
| auto path = CheckJsonKeyExist(*common_dump_settings, kPath); | |||
| auto net_name = CheckJsonKeyExist(*common_dump_settings, kNetName); | |||
| auto iteration = CheckJsonKeyExist(*common_dump_settings, kIteration); | |||
| auto input_output = CheckJsonKeyExist(*common_dump_settings, kInputOutput); | |||
| auto kernels = CheckJsonKeyExist(*common_dump_settings, kKernels); | |||
| auto support_device = CheckJsonKeyExist(*common_dump_settings, kSupportDevice); | |||
| auto op_debug_mode = CheckJsonKeyExist(*common_dump_settings, kOpDebugMode); | |||
| // async_dump is enabled by default, if e2e dump is enabled it will override this | |||
| auto context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| @@ -228,6 +218,20 @@ void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) { | |||
| } | |||
| } | |||
| auto common_dump_settings = CheckJsonKeyExist(content, kCommonDumpSettings); | |||
| auto dump_mode = CheckJsonKeyExist(*common_dump_settings, kDumpMode); | |||
| auto path = CheckJsonKeyExist(*common_dump_settings, kPath); | |||
| auto net_name = CheckJsonKeyExist(*common_dump_settings, kNetName); | |||
| auto iteration = CheckJsonKeyExist(*common_dump_settings, kIteration); | |||
| auto input_output = CheckJsonKeyExist(*common_dump_settings, kInputOutput); | |||
| auto kernels = CheckJsonKeyExist(*common_dump_settings, kKernels); | |||
| auto support_device = CheckJsonKeyExist(*common_dump_settings, kSupportDevice); | |||
| nlohmann::detail::iter_impl<const nlohmann::json> op_debug_mode; | |||
| if (async_dump_enabled_) { | |||
| op_debug_mode = CheckJsonKeyExist(*common_dump_settings, kOpDebugMode); | |||
| } | |||
| ParseDumpMode(*dump_mode); | |||
| ParseDumpPath(*path); | |||
| ParseNetName(*net_name); | |||
| @@ -235,7 +239,9 @@ void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) { | |||
| ParseInputOutput(*input_output); | |||
| ParseKernels(*kernels); | |||
| ParseSupportDevice(*support_device); | |||
| ParseOpDebugMode(*op_debug_mode); | |||
| if (async_dump_enabled_) { | |||
| ParseOpDebugMode(*op_debug_mode); | |||
| } | |||
| } | |||
| void DumpJsonParser::ParseE2eDumpSetting(const nlohmann::json &content) { | |||
| @@ -60,7 +60,6 @@ class DumpJsonParser { | |||
| bool OutputNeedDump() const; | |||
| std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const; | |||
| void UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph); | |||
| bool AsyncDumpEnabled() const { return async_dump_enabled_; } | |||
| private: | |||
| DumpJsonParser() = default; | |||
| @@ -240,7 +240,17 @@ void E2eDump::DumpParametersAndConst(const session::KernelGraph *graph, const st | |||
| void E2eDump::DumpSetup(const session::KernelGraph *graph, uint32_t rank_id) { | |||
| auto &dump_json_parser = DumpJsonParser::GetInstance(); | |||
| uint32_t cur_iter = dump_json_parser.cur_dump_iter(); | |||
| if (dump_json_parser.AsyncDumpEnabled() && dump_json_parser.IsDumpIter(cur_iter)) { | |||
| uint32_t graph_id = graph->graph_id(); | |||
| if (dump_json_parser.async_dump_enabled() || dump_json_parser.e2e_dump_enabled()) { | |||
| if (starting_graph_id == INT32_MAX) { | |||
| starting_graph_id = graph_id; | |||
| } else if (starting_graph_id == graph_id) { | |||
| dump_json_parser.UpdateDumpIter(); | |||
| } | |||
| } | |||
| if (dump_json_parser.async_dump_enabled() && dump_json_parser.IsDumpIter(cur_iter)) { | |||
| auto zero_dir_dump_path = | |||
| dump_json_parser.path() + "/rank_" + std::to_string(rank_id) + "/_/" + std::to_string(graph->graph_id()) + "/0"; | |||
| @@ -291,7 +301,7 @@ bool E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, cons | |||
| DumpOutput(graph, dump_path, debugger); | |||
| DumpParametersAndConst(graph, dump_path, debugger); | |||
| success = true; | |||
| } else if (dump_json_parser.AsyncDumpEnabled()) { | |||
| } else if (dump_json_parser.async_dump_enabled()) { | |||
| uint32_t current_iter = dump_json_parser.cur_dump_iter(); | |||
| auto zero_dir_dump_path = | |||
| @@ -336,14 +346,6 @@ bool E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, cons | |||
| success = true; | |||
| } | |||
| if (starting_graph_id == INT32_MAX) { | |||
| starting_graph_id = graph_id; | |||
| } else { | |||
| if (starting_graph_id == graph_id) { | |||
| dump_json_parser.UpdateDumpIter(); | |||
| } | |||
| } | |||
| return success; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -634,6 +634,22 @@ void DebugServices::AddToTensorData(const std::string &backend_name, const std:: | |||
| result_list->push_back(tensor_data); | |||
| } | |||
| void DebugServices::SetPrefixToCheck(std::string *prefix_dump_file_name, std::string *prefix_dump_file_name_input, | |||
| std::string *prefix_dump_file_name_output, std::string *dump_style_kernel_name, | |||
| size_t slot) { | |||
| if (is_sync_mode) { | |||
| std::string dump_style_name_part = *dump_style_kernel_name; | |||
| std::size_t last_scope_marker = dump_style_kernel_name->rfind("--"); | |||
| if (last_scope_marker != std::string::npos) { | |||
| dump_style_name_part = dump_style_kernel_name->substr(last_scope_marker + 2); | |||
| } | |||
| *prefix_dump_file_name_input = dump_style_name_part + ".input." + std::to_string(slot); | |||
| *prefix_dump_file_name_output = dump_style_name_part + ".output." + std::to_string(slot); | |||
| } else { | |||
| *prefix_dump_file_name = *dump_style_kernel_name; | |||
| } | |||
| } | |||
| void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std::vector<size_t> slot, | |||
| std::vector<unsigned int> device_id, std::vector<unsigned int> iteration, | |||
| std::vector<unsigned int> root_graph_id, | |||
| @@ -665,12 +681,10 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std: | |||
| } | |||
| std::string prefix_dump_file_name; | |||
| if (is_sync_mode) { | |||
| prefix_dump_file_name = dump_style_kernel_name.substr(dump_style_kernel_name.rfind("--") + 2); | |||
| prefix_dump_file_name += ".output." + std::to_string(slot[i]); | |||
| } else { | |||
| prefix_dump_file_name = dump_style_kernel_name; | |||
| } | |||
| std::string prefix_dump_file_name_input; | |||
| std::string prefix_dump_file_name_output; | |||
| SetPrefixToCheck(&prefix_dump_file_name, &prefix_dump_file_name_input, &prefix_dump_file_name_output, | |||
| &dump_style_kernel_name, slot[i]); | |||
| std::string specific_dump_dir = dump_dir + "/rank_" + std::to_string(device_id[i]) + "/" + net_name + "/" + | |||
| std::to_string(root_graph_id[i]) + "/" + std::to_string(iteration[i]); | |||
| @@ -701,7 +715,11 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std: | |||
| std::string start_string = file_name.substr(first_dot + 1, second_dot - first_dot - 1); | |||
| std::string end_string = file_name.substr(fifth_dot, seventh_dot - fifth_dot); | |||
| std::string stripped_file_name = start_string + end_string; | |||
| std::size_t found = stripped_file_name.rfind(prefix_dump_file_name, 0); | |||
| std::size_t found = stripped_file_name.rfind(prefix_dump_file_name_output, 0); | |||
| if (found == std::string::npos) { | |||
| found = stripped_file_name.rfind(prefix_dump_file_name_input, 0); | |||
| } | |||
| if (found != 0) { | |||
| continue; | |||
| @@ -810,14 +828,25 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors( | |||
| for (auto node : wp_nodes) { | |||
| std::string orig_name = node; | |||
| std::string dump_style_name = node; | |||
| std::string dump_style_name_input; | |||
| std::string dump_style_name_output; | |||
| ReplaceSrcFileName(is_sync_mode, &dump_style_name); | |||
| if (is_sync_mode) { | |||
| dump_style_name = dump_style_name.substr(dump_style_name.rfind("--") + 2); | |||
| dump_style_name.append(".output."); | |||
| } | |||
| std::string dump_style_name_part = dump_style_name; | |||
| std::size_t last_scope_marker = dump_style_name.rfind("--"); | |||
| if (last_scope_marker != std::string::npos) { | |||
| dump_style_name_part = dump_style_name.substr(last_scope_marker + 2); | |||
| } | |||
| dump_style_name_input = dump_style_name_part + ".input."; | |||
| proto_to_dump.push_back(std::tuple<std::string, std::string>(orig_name, dump_style_name_input)); | |||
| proto_to_dump.push_back(std::tuple<std::string, std::string>(orig_name, dump_style_name)); | |||
| dump_style_name_output = dump_style_name_part + ".output."; | |||
| proto_to_dump.push_back(std::tuple<std::string, std::string>(orig_name, dump_style_name_output)); | |||
| } else { | |||
| proto_to_dump.push_back(std::tuple<std::string, std::string>(orig_name, dump_style_name)); | |||
| } | |||
| } | |||
| if (!is_sync_mode) { | |||
| @@ -226,6 +226,9 @@ class DebugServices { | |||
| const std::string &type_name, const std::vector<int64_t> &shape, std::vector<char> *buffer, | |||
| std::vector<std::shared_ptr<TensorData>> *result_list); | |||
| void SetPrefixToCheck(std::string *prefix_dump_file_name, std::string *prefix_dump_file_name_input, | |||
| std::string *prefix_dump_file_name_output, std::string *dump_style_kernel_name, size_t slot); | |||
| void ReadDumpedTensor(std::vector<std::string> backend_name, std::vector<size_t> slot, | |||
| std::vector<unsigned int> device_id, std::vector<unsigned int> iteration, | |||
| std::vector<unsigned int> root_graph_id, const std::vector<std::string> &async_file_pool, | |||
| @@ -148,6 +148,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| bool TensorExistsInCurrent(const std::string &tensor_name); | |||
| // check if dump using debugger backend is enabled | |||
| bool CheckDebuggerDumpEnabled() const; | |||
| private: | |||
| // private constructor for singleton | |||
| Debugger(); | |||
| @@ -159,9 +162,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| void SetOpOverflowBinPath(uint32_t graph_id); | |||
| // check if dump using debugger backend is enabled | |||
| bool CheckDebuggerDumpEnabled() const; | |||
| // check if debugger enabled | |||
| bool CheckDebuggerEnabled() const; | |||
| @@ -218,8 +218,10 @@ std::string GetTensorFullName(tensor_info_t info) { | |||
| if (info.is_parameter) { | |||
| // scopes in node name are separated by '/' | |||
| // use the name without scope if truncate is true | |||
| std::size_t found = node_name.find_last_of("/"); | |||
| node_name = node_name.substr(found + 1); | |||
| auto found = node_name.find_last_of("/"); | |||
| if (found != std::string::npos) { | |||
| node_name = node_name.substr(found + 1); | |||
| } | |||
| } | |||
| return node_name + ":" + std::to_string(info.slot); | |||
| } | |||