Merge pull request !5803 from caifubi/dumptags/v1.0.0
| @@ -23,7 +23,7 @@ usage() | |||
| { | |||
| echo "Usage:" | |||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | |||
| echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-a on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I arm64|arm32|x86_64] [-K] \\" | |||
| echo " [-B on|off] [-w on|off] [-E] [-l on|off] [-n full|lite|off]" | |||
| echo "" | |||
| @@ -46,7 +46,6 @@ usage() | |||
| echo " -j[n] Set the threads when building (Default: -j8)" | |||
| echo " -e Use gpu, d or cpu" | |||
| echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | |||
| echo " -Q Enable dump memory, default off" | |||
| echo " -D Enable dumping of function graph ir, default on" | |||
| echo " -z Compile dataset & mindrecord, default on" | |||
| echo " -n Compile minddata with mindspore lite, available: off, lite, full, default is lite" | |||
| @@ -89,7 +88,6 @@ checkopts() | |||
| ENABLE_LOAD_IR="off" | |||
| ENABLE_TIMELINE="off" | |||
| ENABLE_DUMP2PROTO="on" | |||
| ENABLE_DUMPE2E="off" | |||
| ENABLE_DUMP_IR="on" | |||
| COMPILE_MINDDATA="on" | |||
| COMPILE_MINDDATA_LITE="off" | |||
| @@ -108,7 +106,7 @@ checkopts() | |||
| ENABLE_GPU="off" | |||
| # Process the options | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:swB:En:T:' opt | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:D:zM:V:K:swB:En:T:' opt | |||
| do | |||
| OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | |||
| case "${opt}" in | |||
| @@ -229,11 +227,6 @@ checkopts() | |||
| ENABLE_DUMP2PROTO="$OPTARG" | |||
| echo "enable dump anf graph to proto file" | |||
| ;; | |||
| Q) | |||
| check_on_off $OPTARG Q | |||
| ENABLE_DUMPE2E="$OPTARG" | |||
| echo "enable dump end to end" | |||
| ;; | |||
| D) | |||
| check_on_off $OPTARG D | |||
| ENABLE_DUMP_IR="$OPTARG" | |||
| @@ -301,9 +294,6 @@ checkopts() | |||
| done | |||
| } | |||
| checkopts "$@" | |||
| if [[ "X$ENABLE_GPU" = "Xon" ]] && [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then | |||
| ENABLE_DEBUGGER="on" | |||
| fi | |||
| echo "---------------- MindSpore: build start ----------------" | |||
| mkdir -pv "${BUILD_PATH}/package/mindspore/lib" | |||
| git submodule update --init graphengine | |||
| @@ -350,9 +340,6 @@ build_mindspore() | |||
| if [[ "X$ENABLE_DUMP2PROTO" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_PROTO=ON" | |||
| fi | |||
| if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | |||
| fi | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" | |||
| if [[ "X$ENABLE_MPI" = "Xon" ]]; then | |||
| @@ -13,7 +13,6 @@ option(USE_GLOG "Use glog to output log" OFF) | |||
| option(ENABLE_PROFILE "Enable pipeline profile, default off" OFF) | |||
| option(ENABLE_TIMELINE "Enable time line record" OFF) | |||
| option(ENABLE_DUMP_PROTO "Enable dump anf graph to file in ProtoBuffer format, default on" ON) | |||
| option(ENABLE_DUMP_E2E "Enable dump e2e file, default on" OFF) | |||
| option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON) | |||
| option(ENABLE_MPI "enable mpi" OFF) | |||
| option(ENABLE_AKG "enable akg" OFF) | |||
| @@ -116,10 +115,6 @@ if(ENABLE_MINDDATA) | |||
| endif() | |||
| endif() | |||
| if(ENABLE_DUMP_E2E) | |||
| add_compile_definitions(ENABLE_DUMP_E2E) | |||
| endif() | |||
| if(ENABLE_DEBUGGER) | |||
| add_compile_definitions(ENABLE_DEBUGGER) | |||
| endif() | |||
| @@ -1,17 +1,19 @@ | |||
| { | |||
| "DumpSettings": { | |||
| "common_dump_settings": { | |||
| "dump_mode": 0, | |||
| "path": "/test", | |||
| "net_name": "ResNet50", | |||
| "dump_mode": 1, | |||
| "op_debug_mode": 3, | |||
| "iteration": 0, | |||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||
| "input_output": 0, | |||
| "kernels": ["Default/Conv-op12"], | |||
| "support_device": [0,1,2,3,4,5,6,7] | |||
| }, | |||
| "DumpSettingsSpec": { | |||
| "net_name": "net name eg:ResNet50", | |||
| "dump_mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||
| "op_debug_mode": "0: close debug, 1: debug ai-core overflow, 2: debug atomic overflow, 3: debug all overflow", | |||
| "iteration": "specified iteration ", | |||
| "kernels": "op's full scope name which need to be dump" | |||
| "e2e_dump_settings": { | |||
| "enable": false, | |||
| "trans_flag": false | |||
| }, | |||
| "async_dump_settings": { | |||
| "enable": false, | |||
| "op_debug_mode": 0 | |||
| } | |||
| } | |||
| @@ -21,7 +21,7 @@ | |||
| #include <memory> | |||
| #include "framework/ge_runtime/task_info.h" | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "debug/data_dump_parser.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>; | |||
| namespace mindspore { | |||
| @@ -32,7 +32,9 @@ class AscendKernelMod : public KernelMod { | |||
| const std::vector<AddressPtr> &, uint32_t) = 0; | |||
| uint32_t block_dim() { return block_dim_; } | |||
| uint32_t stream_id() { return stream_id_; } | |||
| virtual bool NeedDump() { return DataDumpParser::GetInstance().NeedDump(kernel_name_); } | |||
| virtual bool NeedDump() { | |||
| return DumpJsonParser::GetInstance().NeedDump(kernel_name_) && DumpJsonParser::GetInstance().async_dump_enabled(); | |||
| } | |||
| protected: | |||
| uint32_t block_dim_{1}; | |||
| @@ -38,8 +38,10 @@ | |||
| #include "backend/optimizer/common/helper.h" | |||
| #include "runtime/device/kernel_runtime_manager.h" | |||
| #include "utils/config_manager.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #include "debug/tensor_load.h" | |||
| #include "backend/optimizer/graph_kernel/basic_ops_fusion.h" | |||
| #include "debug/data_dump/e2e_dump_util.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "debug/dump_proto.h" | |||
| @@ -329,8 +331,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor:: | |||
| LoadTensor(kernel_graph); | |||
| } | |||
| #endif | |||
| // dump used for debug | |||
| Dump(kernel_graph); | |||
| #ifdef ENABLE_DEBUGGER | |||
| // debugger post-execution processing | |||
| if (debugger_) { | |||
| @@ -565,6 +565,7 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| bool ret_ok = runtime_instance->Run(kernel_graph.get(), is_task_sink); | |||
| Dump(kernel_graph); | |||
| if (!ret_ok) { | |||
| MS_LOG(EXCEPTION) << "run task error!"; | |||
| } | |||
| @@ -574,9 +575,7 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo | |||
| void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| MS_LOG(INFO) << "Start!"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| (void)runtime_instance->DumpData(kernel_graph.get()); | |||
| E2eDumpUtil::DumpData(kernel_graph.get()); | |||
| MS_LOG(INFO) << "Finish!"; | |||
| } | |||
| @@ -47,6 +47,7 @@ | |||
| #include "utils/ms_utils.h" | |||
| #include "common/trans.h" | |||
| #include "utils/ms_context.h" | |||
| #include "debug/data_dump/e2e_dump_util.h" | |||
| #include "debug/tensor_load.h" | |||
| #include "debug/dump_proto.h" | |||
| @@ -350,14 +351,10 @@ void GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info | |||
| #ifdef ENABLE_DEBUGGER | |||
| void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| #ifdef ENABLE_DUMP_E2E | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| (void)runtime_instance->DumpData(kernel_graph.get(), debugger_.get()); | |||
| E2eDumpUtil::DumpData(kernel_graph.get(), debugger_.get()); | |||
| } | |||
| #endif | |||
| } | |||
| bool GPUSession::DumpDataEnabledIteration() const { | |||
| @@ -16,16 +16,11 @@ if (ENABLE_DEBUGGER) | |||
| ) | |||
| endif (ENABLE_DEBUGGER) | |||
| if (ENABLE_D) | |||
| list(APPEND _DEBUG_SRC_LIST | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/common.cc" | |||
| ) | |||
| list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc") | |||
| if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") | |||
| list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/common.cc") | |||
| list(APPEND _DEBUG_SRC_LIST "data_dump/dump_json_parser.cc") | |||
| list(APPEND _DEBUG_SRC_LIST "data_dump/e2e_dump_util.cc") | |||
| endif() | |||
| if (ENABLE_DUMP_E2E) | |||
| list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc") | |||
| endif (ENABLE_DUMP_E2E) | |||
| set_property(SOURCE ${_DEBUG_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEBUG) | |||
| add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST}) | |||
| @@ -0,0 +1,401 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #include <fstream> | |||
| #include "utils/log_adapter.h" | |||
| #include "debug/common.h" | |||
| #include "utils/ms_context.h" | |||
| #include "utils/convert_utils_base.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| namespace { | |||
| constexpr auto kCommonDumpSettings = "common_dump_settings"; | |||
| constexpr auto kAsyncDumpSettings = "async_dump_settings"; | |||
| constexpr auto kE2eDumpSettings = "e2e_dump_settings"; | |||
| constexpr auto kDumpMode = "dump_mode"; | |||
| constexpr auto kPath = "path"; | |||
| constexpr auto kNetName = "net_name"; | |||
| constexpr auto kIteration = "iteration"; | |||
| constexpr auto kInputOutput = "input_output"; | |||
| constexpr auto kKernels = "kernels"; | |||
| constexpr auto kSupportDevice = "support_device"; | |||
| constexpr auto kEnable = "enable"; | |||
| constexpr auto kOpDebugMode = "op_debug_mode"; | |||
| constexpr auto kTransFlag = "trans_flag"; | |||
| constexpr auto kDumpInputAndOutput = 0; | |||
| constexpr auto kDumpInputOnly = 1; | |||
| constexpr auto kDumpOutputOnly = 2; | |||
| constexpr auto kMindsporeDumpConfig = "MINDSPORE_DUMP_CONFIG"; | |||
| } // namespace | |||
| namespace mindspore { | |||
| auto DumpJsonParser::CheckJsonKeyExist(const nlohmann::json &content, const std::string &key) { | |||
| auto iter = content.find(key); | |||
| if (iter == content.end()) { | |||
| MS_LOG(EXCEPTION) << "Check dump json failed, " << key << " not found"; | |||
| } | |||
| return iter; | |||
| } | |||
| std::string GetIfstreamString(const std::ifstream &ifstream) { | |||
| std::stringstream buffer; | |||
| buffer << ifstream.rdbuf(); | |||
| return buffer.str(); | |||
| } | |||
| bool DumpJsonParser::IsDumpEnabled() { | |||
| auto config_path = std::getenv(kMindsporeDumpConfig); | |||
| if (config_path == nullptr) { | |||
| MS_LOG(INFO) << "Dump config path is null"; | |||
| return false; | |||
| } | |||
| auto context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| if (context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) { | |||
| MS_LOG(INFO) << "Dump is disabled in PyNative mode"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| void DumpJsonParser::Parse() { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| if (!IsDumpEnabled()) { | |||
| return; | |||
| } | |||
| auto dump_config_file = Common::GetConfigFile(kMindsporeDumpConfig); | |||
| if (!dump_config_file.has_value()) { | |||
| MS_LOG(EXCEPTION) << "Get dump config file failed"; | |||
| } | |||
| std::ifstream json_file(dump_config_file.value()); | |||
| if (!json_file.is_open()) { | |||
| MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed."; | |||
| } | |||
| nlohmann::json j; | |||
| try { | |||
| json_file >> j; | |||
| } catch (nlohmann::json::parse_error &e) { | |||
| MS_LOG(ERROR) << "Dump json contents:" << GetIfstreamString(json_file); | |||
| MS_LOG(EXCEPTION) << "Parse dump json failed, error:" << e.what(); | |||
| } | |||
| // convert json to string | |||
| std::stringstream ss; | |||
| ss << j; | |||
| std::string cfg = ss.str(); | |||
| MS_LOG(INFO) << "Dump json:" << cfg; | |||
| ParseCommonDumpSetting(j); | |||
| ParseAsyncDumpSetting(j); | |||
| ParseE2eDumpSetting(j); | |||
| JudgeDumpEnabled(); | |||
| } | |||
| bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, size_t len) { | |||
| if (filename.empty() || data == nullptr || len == 0) { | |||
| MS_LOG(ERROR) << "Incorrect parameter."; | |||
| return false; | |||
| } | |||
| auto realpath = Common::GetRealPath(filename); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed."; | |||
| return false; | |||
| } | |||
| std::ofstream fd; | |||
| fd.open(realpath.value(), std::ios::binary | std::ios::out); | |||
| if (!fd.is_open()) { | |||
| MS_LOG(ERROR) << "Open file " << realpath.value() << " fail."; | |||
| return false; | |||
| } | |||
| (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len)); | |||
| fd.close(); | |||
| return true; | |||
| } | |||
| void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) { | |||
| auto common_dump_settings = CheckJsonKeyExist(content, kCommonDumpSettings); | |||
| auto dump_mode = CheckJsonKeyExist(*common_dump_settings, kDumpMode); | |||
| auto path = CheckJsonKeyExist(*common_dump_settings, kPath); | |||
| auto net_name = CheckJsonKeyExist(*common_dump_settings, kNetName); | |||
| auto iteration = CheckJsonKeyExist(*common_dump_settings, kIteration); | |||
| auto input_output = CheckJsonKeyExist(*common_dump_settings, kInputOutput); | |||
| auto kernels = CheckJsonKeyExist(*common_dump_settings, kKernels); | |||
| auto support_device = CheckJsonKeyExist(*common_dump_settings, kSupportDevice); | |||
| ParseDumpMode(*dump_mode); | |||
| ParseDumpPath(*path); | |||
| ParseNetName(*net_name); | |||
| ParseIteration(*iteration); | |||
| ParseInputOutput(*input_output); | |||
| ParseKernels(*kernels); | |||
| ParseSupportDevice(*support_device); | |||
| } | |||
| void DumpJsonParser::ParseAsyncDumpSetting(const nlohmann::json &content) { | |||
| // async dump setting is optional | |||
| auto async_dump_setting = content.find(kAsyncDumpSettings); | |||
| if (async_dump_setting == content.end()) { | |||
| MS_LOG(INFO) << "No async_dump_settings"; | |||
| return; | |||
| } | |||
| auto async_dump_enable = CheckJsonKeyExist(*async_dump_setting, kEnable); | |||
| auto op_debug_mode = CheckJsonKeyExist(*async_dump_setting, kOpDebugMode); | |||
| async_dump_enabled_ = ParseEnable(*async_dump_enable); | |||
| ParseOpDebugMode(*op_debug_mode); | |||
| } | |||
| void DumpJsonParser::ParseE2eDumpSetting(const nlohmann::json &content) { | |||
| auto e2e_dump_setting = CheckJsonKeyExist(content, kE2eDumpSettings); | |||
| if (e2e_dump_setting == content.end()) { | |||
| MS_LOG(INFO) << "No e2e_dump_settings"; | |||
| return; | |||
| } | |||
| auto e2e_dump_enable = CheckJsonKeyExist(*e2e_dump_setting, kEnable); | |||
| auto trans_flag = CheckJsonKeyExist(*e2e_dump_setting, kTransFlag); | |||
| e2e_dump_enabled_ = ParseEnable(*e2e_dump_enable); | |||
| trans_flag_ = ParseEnable(*trans_flag); | |||
| } | |||
| void CheckJsonUnsignedType(const nlohmann::json &content, const std::string &key) { | |||
| if (!content.is_number_unsigned()) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be unsigned int type"; | |||
| } | |||
| } | |||
| void CheckJsonStringType(const nlohmann::json &content, const std::string &key) { | |||
| if (!content.is_string()) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be string type"; | |||
| } | |||
| } | |||
| void CheckJsonArrayType(const nlohmann::json &content, const std::string &key) { | |||
| if (!content.is_array()) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be array type"; | |||
| } | |||
| } | |||
| void DumpJsonParser::ParseDumpMode(const nlohmann::json &content) { | |||
| CheckJsonUnsignedType(content, kDumpMode); | |||
| dump_mode_ = content; | |||
| if (dump_mode_ != 0 && dump_mode_ != 1) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed. dump_mode should be 0 or 1"; | |||
| } | |||
| } | |||
| void DumpJsonParser::ParseDumpPath(const nlohmann::json &content) { | |||
| CheckJsonStringType(content, kPath); | |||
| path_ = content; | |||
| if (!std::all_of(path_.begin(), path_.end(), | |||
| [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '/'; })) { | |||
| MS_LOG(EXCEPTION) << "Dump path only support alphabets, digit or {'-', '_', '/'}, but got:" << path_; | |||
| } | |||
| if (path_.empty()) { | |||
| MS_LOG(EXCEPTION) << "Dump path is empty"; | |||
| } | |||
| } | |||
| void DumpJsonParser::ParseNetName(const nlohmann::json &content) { | |||
| CheckJsonStringType(content, kNetName); | |||
| net_name_ = content; | |||
| if (!std::all_of(net_name_.begin(), net_name_.end(), | |||
| [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_'; })) { | |||
| MS_LOG(EXCEPTION) << "Dump path only support alphabets, digit or {'-', '_'}, but got:" << net_name_; | |||
| } | |||
| } | |||
| void DumpJsonParser::ParseIteration(const nlohmann::json &content) { | |||
| CheckJsonUnsignedType(content, kIteration); | |||
| iteration_ = content; | |||
| } | |||
| void DumpJsonParser::ParseInputOutput(const nlohmann::json &content) { | |||
| CheckJsonUnsignedType(content, kInputOutput); | |||
| input_output_ = content; | |||
| if (input_output_ < 0 || input_output_ > 2) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed. input_output should be 0, 1, 2"; | |||
| } | |||
| } | |||
| void DumpJsonParser::ParseKernels(const nlohmann::json &content) { | |||
| CheckJsonArrayType(content, kKernels); | |||
| for (const auto &kernel : content) { | |||
| auto kernel_str = kernel.dump(); | |||
| kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end()); | |||
| MS_LOG(INFO) << "Need dump kernel:" << kernel_str; | |||
| auto ret = kernels_.try_emplace({kernel_str, 0}); | |||
| if (!ret.second) { | |||
| MS_LOG(WARNING) << "Duplicate dump kernel name:" << kernel_str; | |||
| } | |||
| } | |||
| } | |||
| void DumpJsonParser::ParseSupportDevice(const nlohmann::json &content) { | |||
| CheckJsonArrayType(content, kSupportDevice); | |||
| for (const auto &device : content) { | |||
| uint32_t device_id = device; | |||
| MS_LOG(INFO) << "Dump support device:" << device_id; | |||
| auto ret = support_devices_.emplace(device_id); | |||
| if (!ret.second) { | |||
| MS_LOG(WARNING) << "Duplicate support device:" << device_id; | |||
| } | |||
| } | |||
| } | |||
| bool DumpJsonParser::ParseEnable(const nlohmann::json &content) { | |||
| if (!content.is_boolean()) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed. 'enable' should be boolean type"; | |||
| } | |||
| return content; | |||
| } | |||
| void DumpJsonParser::ParseOpDebugMode(const nlohmann::json &content) { | |||
| CheckJsonUnsignedType(content, kOpDebugMode); | |||
| op_debug_mode_ = content; | |||
| if (op_debug_mode_ < 0 || op_debug_mode_ > 3) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed. op_debug_mode should be 0, 1, 2, 3"; | |||
| } | |||
| } | |||
| void DumpJsonParser::JudgeDumpEnabled() { | |||
| auto context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) { | |||
| async_dump_enabled_ = false; | |||
| } | |||
| if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice) { | |||
| if (async_dump_enabled_ && e2e_dump_enabled_) { | |||
| async_dump_enabled_ = false; | |||
| MS_LOG(INFO) << "Disable async dump"; | |||
| } | |||
| } | |||
| if (!async_dump_enabled_ && !e2e_dump_enabled_) { | |||
| MS_LOG(WARNING) << "Dump json parse failed. Dump not enabled"; | |||
| } | |||
| auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||
| if (support_devices_.find(device_id) == support_devices_.end()) { | |||
| async_dump_enabled_ = false; | |||
| e2e_dump_enabled_ = false; | |||
| MS_LOG(WARNING) << "Dump not enabled. device_id:" << device_id << " not support"; | |||
| } | |||
| context->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, !e2e_dump_enabled_); | |||
| } | |||
| bool DumpJsonParser::NeedDump(const std::string &op_full_name) const { | |||
| if (dump_mode_ == 0) { | |||
| return true; | |||
| } | |||
| auto iter = kernels_.find(op_full_name); | |||
| return iter != kernels_.end(); | |||
| } | |||
| void DumpJsonParser::MatchKernel(const std::string &kernel_name) { | |||
| auto iter = kernels_.find(kernel_name); | |||
| if (iter == kernels_.end()) { | |||
| return; | |||
| } | |||
| iter->second = iter->second + 1; | |||
| MS_LOG(INFO) << "Match dump kernel:" << iter->first << " match times:" << iter->second; | |||
| } | |||
| void DumpJsonParser::PrintUnusedKernel() { | |||
| for (const auto &iter : kernels_) { | |||
| if (iter.second == 0) { | |||
| MS_LOG(WARNING) << "[DataDump] Unused Kernel in json:" << iter.first; | |||
| } | |||
| } | |||
| } | |||
| std::string DumpJsonParser::GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const { | |||
| std::string bin_path = "/var/log/npu/ide_daemon/dump"; | |||
| const char *dump_data_path = std::getenv("DATA_DUMP_PATH"); | |||
| if (dump_data_path != nullptr) { | |||
| bin_path.append(dump_data_path); | |||
| bin_path.append("_"); | |||
| } | |||
| bin_path.append(std::to_string(device_id)); | |||
| bin_path.append("/"); | |||
| bin_path.append(net_name_); | |||
| bin_path.append("_"); | |||
| bin_path.append(std::to_string(graph_id)); | |||
| bin_path.append("/"); | |||
| bin_path.append(std::to_string(dump_mode_)); | |||
| bin_path.append("/"); | |||
| bin_path.append(std::to_string(iteration_)); | |||
| bin_path.append("/"); | |||
| return bin_path; | |||
| } | |||
| bool DumpJsonParser::InputNeedDump() const { | |||
| return input_output_ == kDumpInputAndOutput || input_output_ == kDumpInputOnly; | |||
| } | |||
| bool DumpJsonParser::OutputNeedDump() const { | |||
| return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly; | |||
| } | |||
| bool NeedAsyncDump(const CNodePtr &kernel) { | |||
| if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL && | |||
| AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) { | |||
| return false; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| // dump all kernel if mode is set 0 in data_dump.json | |||
| return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope()); | |||
| } | |||
| void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph) { | |||
| if (e2e_dump_enabled_) { | |||
| MS_LOG(INFO) << "E2e dump no need to update dump kernel list"; | |||
| } | |||
| std::map<std::string, uint32_t> update_kernels; | |||
| for (const auto &kernel : kernel_graph->execution_order()) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL && | |||
| DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) { | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| for (size_t i = 0; i < input_size; ++i) { | |||
| auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i); | |||
| auto input = input_with_index.first; | |||
| if (input->isa<CNode>()) { | |||
| MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope() | |||
| << " Input:" << input->fullname_with_scope(); | |||
| update_kernels.try_emplace(input->fullname_with_scope(), 0); | |||
| } | |||
| } | |||
| } else if (NeedAsyncDump(kernel)) { | |||
| MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope(); | |||
| update_kernels.try_emplace(kernel->fullname_with_scope(), 0); | |||
| } | |||
| } | |||
| kernels_.insert(update_kernels.begin(), update_kernels.end()); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,96 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_ | |||
| #include <string> | |||
| #include <map> | |||
| #include <set> | |||
| #include <mutex> | |||
| #include "nlohmann/json.hpp" | |||
| #include "utils/ms_utils.h" | |||
| #include "backend/session/kernel_graph.h" | |||
| namespace mindspore { | |||
| class DumpJsonParser { | |||
| public: | |||
| static DumpJsonParser &GetInstance() { | |||
| static DumpJsonParser instance; | |||
| return instance; | |||
| } | |||
| void Parse(); | |||
| static bool DumpToFile(const std::string &filename, const void *data, size_t len); | |||
| bool NeedDump(const std::string &op_full_name) const; | |||
| void MatchKernel(const std::string &kernel_name); | |||
| void PrintUnusedKernel(); | |||
| bool async_dump_enabled() const { return async_dump_enabled_; } | |||
| bool e2e_dump_enabled() const { return e2e_dump_enabled_; } | |||
| uint32_t dump_mode() const { return dump_mode_; } | |||
| std::string path() const { return path_; } | |||
| std::string net_name() const { return net_name_; } | |||
| uint32_t iteration() const { return iteration_; } | |||
| uint32_t input_output() const { return input_output_; } | |||
| uint32_t op_debug_mode() const { return op_debug_mode_; } | |||
| bool trans_flag() const { return trans_flag_; } | |||
| uint32_t cur_dump_iter() { return cur_dump_iter_; } | |||
| void UpdateDumpIter() { ++cur_dump_iter_; } | |||
| bool InputNeedDump() const; | |||
| bool OutputNeedDump() const; | |||
| std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const; | |||
| void UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph); | |||
| private: | |||
| DumpJsonParser() = default; | |||
| ~DumpJsonParser() = default; | |||
| DISABLE_COPY_AND_ASSIGN(DumpJsonParser) | |||
| std::mutex lock_; | |||
| bool async_dump_enabled_{false}; | |||
| bool e2e_dump_enabled_{false}; | |||
| uint32_t dump_mode_{0}; | |||
| std::string path_; | |||
| std::string net_name_; | |||
| uint32_t iteration_{0}; | |||
| uint32_t input_output_{0}; | |||
| std::map<std::string, uint32_t> kernels_; | |||
| std::set<uint32_t> support_devices_; | |||
| uint32_t op_debug_mode_{0}; | |||
| bool trans_flag_{false}; | |||
| uint32_t cur_dump_iter_{0}; | |||
| void ParseCommonDumpSetting(const nlohmann::json &content); | |||
| void ParseAsyncDumpSetting(const nlohmann::json &content); | |||
| void ParseE2eDumpSetting(const nlohmann::json &content); | |||
| bool IsDumpEnabled(); | |||
| auto CheckJsonKeyExist(const nlohmann::json &content, const std::string &key); | |||
| void ParseDumpMode(const nlohmann::json &content); | |||
| void ParseDumpPath(const nlohmann::json &content); | |||
| void ParseNetName(const nlohmann::json &content); | |||
| void ParseIteration(const nlohmann::json &content); | |||
| void ParseInputOutput(const nlohmann::json &content); | |||
| void ParseKernels(const nlohmann::json &content); | |||
| void ParseSupportDevice(const nlohmann::json &content); | |||
| bool ParseEnable(const nlohmann::json &content); | |||
| void ParseOpDebugMode(const nlohmann::json &content); | |||
| void JudgeDumpEnabled(); | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_ | |||
| @@ -0,0 +1,222 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debug/data_dump/e2e_dump_util.h" | |||
| #include <algorithm> | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #include "common/trans.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "utils/ms_context.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debug_services.h" | |||
| #include "debug/tensor_load.h" | |||
| #include "debug/debugger/debugger.h" | |||
| #endif | |||
| namespace { | |||
| const size_t PRAMATER_OUTPUT_INDEX = 0; | |||
| } | |||
| namespace mindspore { | |||
| void E2eDumpUtil::GetFileKernelName(NotNull<std::string *> kernel_name) { | |||
| const std::string strsrc = "/"; | |||
| const std::string strdst = "--"; | |||
| std::string::size_type pos = 0; | |||
| std::string::size_type srclen = strsrc.size(); | |||
| std::string::size_type dstlen = strdst.size(); | |||
| while ((pos = kernel_name->find(strsrc, pos)) != std::string::npos) { | |||
| kernel_name->replace(pos, srclen, strdst); | |||
| pos += dstlen; | |||
| } | |||
| } | |||
| bool E2eDumpUtil::IsDeviceTargetGPU() { | |||
| auto context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice; | |||
| } | |||
| void E2eDumpUtil::DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr, | |||
| bool trans_flag, const ShapeVector &int_shapes, const TypeId &type) { | |||
| auto format = kOpFormat_DEFAULT; | |||
| auto ret = addr->DumpMemToFile(trans_flag, file_path, format, int_shapes, type); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << file_path << ", host_format:" << format | |||
| << ".!"; | |||
| } | |||
| } | |||
| void E2eDumpUtil::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name, | |||
| NotNull<const device::DeviceAddress *> addr, bool trans_flag, | |||
| const ShapeVector &int_shapes, const TypeId &type, size_t slot, Debugger *debugger) { | |||
| #ifdef ENABLE_DEBUGGER | |||
| auto format = kOpFormat_DEFAULT; | |||
| DebugServices *debug_services = debugger->debug_services(); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| auto ret = tensor_loader->DumpTensorToFile(original_kernel_name, trans_flag, file_path, format, int_shapes, type, | |||
| addr->type_id(), addr->format(), slot); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "DumpTensorToFile Failed: flag:" << std::to_string(trans_flag) << ", path:" << file_path | |||
| << ", host_format:" << format; | |||
| } | |||
| #endif | |||
| } | |||
| void E2eDumpUtil::GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag, | |||
| NotNull<ShapeVector *> int_shapes) { | |||
| if (trans_flag) { | |||
| *int_shapes = trans::GetRuntimePaddingShape(node, index); | |||
| } else { | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(node, index); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(*int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| } | |||
| } | |||
| void E2eDumpUtil::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto &dump_json_parser = DumpJsonParser::GetInstance(); | |||
| if (!dump_json_parser.OutputNeedDump()) { | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "Start e2e dump output"; | |||
| bool trans_flag = dump_json_parser.trans_flag(); | |||
| const auto &apply_kernels = graph->execution_order(); | |||
| for (const auto &node : apply_kernels) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto node_name = AnfAlgo::GetCNodeName(node); | |||
| std::string kernel_name = node->fullname_with_scope(); | |||
| if (!dump_json_parser.NeedDump(kernel_name)) { | |||
| continue; | |||
| } | |||
| DumpJsonParser::GetInstance().MatchKernel(kernel_name); | |||
| GetFileKernelName(NOT_NULL(&kernel_name)); | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(node); | |||
| for (size_t j = 0; j < output_size; ++j) { | |||
| auto addr = AnfAlgo::GetOutputAddr(node, j); | |||
| ShapeVector int_shapes; | |||
| GetDumpIntShape(node, j, trans_flag, NOT_NULL(&int_shapes)); | |||
| auto type = AnfAlgo::GetOutputInferDataType(node, j); | |||
| std::string file_path = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j); | |||
| if (IsDeviceTargetGPU()) { | |||
| DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j, | |||
| debugger); | |||
| } else { | |||
| DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void E2eDumpUtil::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto &dump_json_parser = DumpJsonParser::GetInstance(); | |||
| if (!dump_json_parser.InputNeedDump()) { | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "Start e2e dump input"; | |||
| bool trans_flag = dump_json_parser.trans_flag(); | |||
| const auto &apply_kernels = graph->execution_order(); | |||
| for (const auto &node : apply_kernels) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto node_name = AnfAlgo::GetCNodeName(node); | |||
| std::string kernel_name = node->fullname_with_scope(); | |||
| if (!dump_json_parser.NeedDump(kernel_name)) { | |||
| continue; | |||
| } | |||
| DumpJsonParser::GetInstance().MatchKernel(kernel_name); | |||
| GetFileKernelName(NOT_NULL(&kernel_name)); | |||
| auto input_size = AnfAlgo::GetInputTensorNum(node); | |||
| for (size_t j = 0; j < input_size; ++j) { | |||
| auto kernel_with_index = AnfAlgo::GetPrevNodeOutput(node, j); | |||
| auto input = kernel_with_index.first; | |||
| auto index = kernel_with_index.second; | |||
| auto addr = AnfAlgo::GetOutputAddr(input, index); | |||
| ShapeVector int_shapes; | |||
| GetDumpIntShape(input, index, trans_flag, NOT_NULL(&int_shapes)); | |||
| auto type = AnfAlgo::GetOutputInferDataType(input, index); | |||
| std::string file_path = dump_path + '/' + kernel_name + '_' + "input_" + std::to_string(j); | |||
| if (IsDeviceTargetGPU()) { | |||
| DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j, | |||
| debugger); | |||
| } else { | |||
| DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void E2eDumpUtil::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto &dump_json_parser = DumpJsonParser::GetInstance(); | |||
| MS_LOG(INFO) << "Start e2e dump parameters"; | |||
| bool trans_flag = dump_json_parser.trans_flag(); | |||
| const auto ¶meters = graph->inputs(); | |||
| for (auto &item : parameters) { | |||
| if (!item->isa<Parameter>()) { | |||
| continue; | |||
| } | |||
| std::string parameter_name = item->fullname_with_scope(); | |||
| if (!dump_json_parser.NeedDump(parameter_name)) { | |||
| continue; | |||
| } | |||
| DumpJsonParser::GetInstance().MatchKernel(parameter_name); | |||
| auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); | |||
| ShapeVector int_shapes; | |||
| GetDumpIntShape(item, PRAMATER_OUTPUT_INDEX, trans_flag, NOT_NULL(&int_shapes)); | |||
| auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); | |||
| std::string file_path = dump_path + '/' + parameter_name + '_' + "output_0"; | |||
| if (IsDeviceTargetGPU()) { | |||
| DumpGPUMemToFile(file_path, parameter_name, NOT_NULL(addr), trans_flag, int_shapes, type, 0, debugger); | |||
| } else { | |||
| DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type); | |||
| } | |||
| } | |||
| } | |||
| bool E2eDumpUtil::DumpData(const session::KernelGraph *graph, Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto &dump_json_parser = DumpJsonParser::GetInstance(); | |||
| dump_json_parser.UpdateDumpIter(); | |||
| auto dump_flag = dump_json_parser.e2e_dump_enabled(); | |||
| if (!dump_flag) { | |||
| MS_LOG(INFO) << "E2e dump is disabled, skip dump step"; | |||
| return true; | |||
| } | |||
| if (dump_json_parser.iteration() != 0) { | |||
| if (dump_json_parser.cur_dump_iter() != dump_json_parser.iteration()) { | |||
| return true; | |||
| } | |||
| } | |||
| MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter(); | |||
| std::string net_name = dump_json_parser.net_name(); | |||
| std::string iterator = std::to_string(dump_json_parser.cur_dump_iter()); | |||
| std::string dump_path = dump_json_parser.path(); | |||
| if (dump_path.back() == '/') { | |||
| dump_path = dump_path + net_name + '/' + iterator; | |||
| } else { | |||
| dump_path = dump_path + '/' + net_name + '/' + iterator; | |||
| } | |||
| DumpInput(graph, dump_path, debugger); | |||
| DumpOutput(graph, dump_path, debugger); | |||
| DumpParameters(graph, dump_path, debugger); | |||
| return true; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_ | |||
| #include <string> | |||
| #include "backend/session/kernel_graph.h" | |||
| #include "runtime/device/device_address.h" | |||
| #ifndef ENABLE_DEBUGGER | |||
| class Debugger; | |||
| #endif | |||
| namespace mindspore { | |||
| class E2eDumpUtil { | |||
| public: | |||
| E2eDumpUtil() = default; | |||
| ~E2eDumpUtil() = default; | |||
| static bool DumpData(const session::KernelGraph *graph, Debugger *debugger = nullptr); | |||
| private: | |||
| static void DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger); | |||
| static void DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger); | |||
| static void DumpParameters(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger); | |||
| static void GetFileKernelName(NotNull<std::string *> kernel_name); | |||
| static void DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr, bool trans_flag, | |||
| const ShapeVector &int_shapes, const TypeId &type); | |||
| static void DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name, | |||
| NotNull<const device::DeviceAddress *> addr, bool trans_flag, | |||
| const ShapeVector &int_shapes, const TypeId &type, size_t slot, Debugger *debugger); | |||
| static void GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag, NotNull<ShapeVector *> int_shapes); | |||
| static bool IsDeviceTargetGPU(); | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_ | |||
| @@ -1,236 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debug/data_dump_parser.h" | |||
| #include <fstream> | |||
| #include "utils/ms_context.h" | |||
| #include "debug/common.h" | |||
| static constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH"; | |||
| static constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP"; | |||
| static constexpr auto kDataDumpPath = "DATA_DUMP_PATH"; | |||
| static constexpr auto kConfigDumpMode = "dump_mode"; | |||
| static constexpr auto kConfigOpDebugMode = "op_debug_mode"; | |||
| static constexpr auto kConfigNetName = "net_name"; | |||
| static constexpr auto kConfigIteration = "iteration"; | |||
| static constexpr auto kConfigKernels = "kernels"; | |||
| namespace mindspore { | |||
| void DataDumpParser::ResetParam() { | |||
| enable_ = false; | |||
| net_name_.clear(); | |||
| dump_mode_ = 0; | |||
| dump_step_ = 0; | |||
| kernel_map_.clear(); | |||
| } | |||
| bool DataDumpParser::DumpEnabled() const { | |||
| auto enable_dump = std::getenv(kEnableDataDump); | |||
| if (enable_dump == nullptr) { | |||
| MS_LOG(INFO) << "[DataDump] enable dump is null. If you want to dump data, please export ENABLE_DATA_DUMP"; | |||
| return false; | |||
| } | |||
| auto enabled = std::atoi(enable_dump); | |||
| if (enabled != 1) { | |||
| MS_LOG(WARNING) << "[DataDump] If you want to dump data, please export ENABLE_DATA_DUMP=1"; | |||
| return false; | |||
| } | |||
| auto context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| if (context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump"; | |||
| } | |||
| return true; | |||
| } | |||
| std::optional<std::string> DataDumpParser::GetDumpPath() const { | |||
| auto dump_path = std::getenv(kDataDumpPath); | |||
| if (dump_path == nullptr) { | |||
| MS_LOG(ERROR) << "[DataDump] dump path is null. If you want to dump data, please export DATA_DUMP_PATH"; | |||
| return {}; | |||
| } | |||
| std::string dump_path_str(dump_path); | |||
| if (!std::all_of(dump_path_str.begin(), dump_path_str.end(), | |||
| [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '/'; })) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] dump path only support alphabets, digit or {'-', '_', '/'}, but got:" | |||
| << dump_path_str; | |||
| } | |||
| return dump_path_str; | |||
| } | |||
| std::string GetIfstreamString(const std::ifstream &ifstream) { | |||
| std::stringstream buffer; | |||
| buffer << ifstream.rdbuf(); | |||
| return buffer.str(); | |||
| } | |||
| void DataDumpParser::ParseDumpConfig() { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| MS_LOG(INFO) << "[DataDump] parse start"; | |||
| if (!DumpEnabled()) { | |||
| MS_LOG(INFO) << "[DataDump] dump not enable"; | |||
| return; | |||
| } | |||
| ResetParam(); | |||
| auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah); | |||
| if (!dump_config_file.has_value()) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Get config file failed"; | |||
| } | |||
| std::ifstream json_file(dump_config_file.value()); | |||
| if (!json_file.is_open()) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed."; | |||
| } | |||
| nlohmann::json j; | |||
| try { | |||
| json_file >> j; | |||
| } catch (nlohmann::json::parse_error &e) { | |||
| MS_LOG(ERROR) << "[DataDump] json contents:" << GetIfstreamString(json_file); | |||
| MS_LOG(EXCEPTION) << "[DataDump] parse json failed, error:" << e.what(); | |||
| } | |||
| if (j.find("DumpSettings") == j.end()) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist."; | |||
| } | |||
| nlohmann::json dump_settings = j.at("DumpSettings"); | |||
| // convert json to string | |||
| std::stringstream ss; | |||
| ss << dump_settings; | |||
| std::string cfg = ss.str(); | |||
| MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg; | |||
| if (!IsConfigExist(dump_settings)) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid"; | |||
| } | |||
| if (!ParseDumpSetting(dump_settings)) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed"; | |||
| } | |||
| } | |||
| bool DataDumpParser::NeedDump(const std::string &op_full_name) const { | |||
| if (!DumpEnabled()) { | |||
| return false; | |||
| } | |||
| if (dump_mode_ == 0) { | |||
| return true; | |||
| } | |||
| auto iter = kernel_map_.find(op_full_name); | |||
| return iter != kernel_map_.end(); | |||
| } | |||
| bool CheckConfigKey(const nlohmann::json &dump_settings, const std::string &key) { | |||
| if (dump_settings.find(key) == dump_settings.end()) { | |||
| MS_LOG(ERROR) << "[DataDump] DumpSettings key:" << key << " is not exist."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const { | |||
| return CheckConfigKey(dump_settings, kConfigDumpMode) && CheckConfigKey(dump_settings, kConfigNetName) && | |||
| CheckConfigKey(dump_settings, kConfigOpDebugMode) && CheckConfigKey(dump_settings, kConfigIteration) && | |||
| CheckConfigKey(dump_settings, kConfigKernels); | |||
| } | |||
| bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) { | |||
| auto mode = dump_settings.at(kConfigDumpMode); | |||
| auto op_debug_mode = dump_settings.at(kConfigOpDebugMode); | |||
| auto net_name = dump_settings.at(kConfigNetName); | |||
| auto iteration = dump_settings.at(kConfigIteration); | |||
| auto kernels = dump_settings.at(kConfigKernels); | |||
| if (!(mode.is_number_unsigned() && op_debug_mode.is_number_unsigned() && net_name.is_string() && | |||
| iteration.is_number_unsigned() && kernels.is_array())) { | |||
| MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid."; | |||
| enable_ = false; | |||
| return false; | |||
| } | |||
| CheckDumpMode(mode); | |||
| CheckOpDebugMode(op_debug_mode); | |||
| enable_ = true; | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| dump_mode_ = mode; | |||
| op_debug_mode_ = op_debug_mode; | |||
| net_name_ = net_name; | |||
| dump_step_ = iteration; | |||
| for (const auto &kernel : kernels) { | |||
| auto kernel_str = kernel.dump(); | |||
| kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end()); | |||
| MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str; | |||
| kernel_map_.insert({kernel_str, 0}); | |||
| } | |||
| return true; | |||
| } | |||
| void DataDumpParser::MatchKernel(const std::string &kernel_name) { | |||
| auto iter = kernel_map_.find(kernel_name); | |||
| if (iter == kernel_map_.end()) { | |||
| return; | |||
| } | |||
| iter->second = iter->second + 1; | |||
| MS_LOG(INFO) << "Match dump kernel:" << iter->first << " match times:" << iter->second; | |||
| } | |||
| void DataDumpParser::PrintUnusedKernel() { | |||
| for (const auto &iter : kernel_map_) { | |||
| if (iter.second == 0) { | |||
| MS_LOG(WARNING) << "[DataDump] Unused Kernel in json:" << iter.first; | |||
| } | |||
| } | |||
| } | |||
| void DataDumpParser::CheckDumpMode(uint32_t dump_mode) const { | |||
| if (dump_mode != 0 && dump_mode != 1) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] dump_mode in config json should be 0 or 1"; | |||
| } | |||
| } | |||
| void DataDumpParser::CheckOpDebugMode(uint32_t op_debug_mode) const { | |||
| if (op_debug_mode < 0 || op_debug_mode > 3) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] op_debug_mode in config json file should be [0-3]"; | |||
| } | |||
| } | |||
| std::string DataDumpParser::GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const { | |||
| std::string bin_path = "/var/log/npu/ide_daemon/dump"; | |||
| const char *dump_data_path = std::getenv("DATA_DUMP_PATH"); | |||
| if (dump_data_path != nullptr) { | |||
| bin_path.append(dump_data_path); | |||
| bin_path.append("_"); | |||
| } | |||
| bin_path.append(std::to_string(device_id)); | |||
| bin_path.append("/"); | |||
| bin_path.append(net_name_); | |||
| bin_path.append("_"); | |||
| bin_path.append(std::to_string(graph_id)); | |||
| bin_path.append("/"); | |||
| bin_path.append(std::to_string(dump_mode_)); | |||
| bin_path.append("/"); | |||
| bin_path.append(std::to_string(dump_step_)); | |||
| bin_path.append("/"); | |||
| return bin_path; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -1,67 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||
| #define MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||
| #include <string> | |||
| #include <map> | |||
| #include <mutex> | |||
| #include <optional> | |||
| #include "nlohmann/json.hpp" | |||
| #include "utils/ms_utils.h" | |||
| namespace mindspore { | |||
| class DataDumpParser { | |||
| public: | |||
| static DataDumpParser &GetInstance() { | |||
| static DataDumpParser instance; | |||
| return instance; | |||
| } | |||
| void ParseDumpConfig(); | |||
| bool NeedDump(const std::string &op_full_name) const; | |||
| bool DumpEnabled() const; | |||
| std::optional<std::string> GetDumpPath() const; | |||
| bool enable() const { return enable_; } | |||
| const std::string &net_name() const { return net_name_; } | |||
| uint32_t dump_mode() const { return dump_mode_; } | |||
| uint32_t op_debug_mode() const { return op_debug_mode_; } | |||
| uint32_t dump_step() const { return dump_step_; } | |||
| void MatchKernel(const std::string &kernel_name); | |||
| void PrintUnusedKernel(); | |||
| std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const; | |||
| private: | |||
| DataDumpParser() = default; | |||
| virtual ~DataDumpParser() = default; | |||
| DISABLE_COPY_AND_ASSIGN(DataDumpParser); | |||
| void ResetParam(); | |||
| bool IsConfigExist(const nlohmann::json &dump_settings) const; | |||
| bool ParseDumpSetting(const nlohmann::json &dump_settings); | |||
| void CheckDumpMode(uint32_t dump_mode) const; | |||
| void CheckOpDebugMode(uint32_t op_debug_mode) const; | |||
| std::mutex lock_; | |||
| bool enable_{false}; | |||
| std::string net_name_; | |||
| uint32_t op_debug_mode_{0}; | |||
| uint32_t dump_mode_{0}; | |||
| uint32_t dump_step_{0}; | |||
| std::map<std::string, uint32_t> kernel_map_; | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||
| @@ -25,7 +25,7 @@ | |||
| #include <utility> | |||
| #include <map> | |||
| #include "debug/debugger/debugger.h" | |||
| #include "debug/data_dump_parser.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #include "pipeline/jit/pipeline.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "runtime/device/kernel_runtime_manager.h" | |||
| @@ -137,7 +137,7 @@ void Debugger::EnableDebugger() { | |||
| } | |||
| #ifdef ENABLE_D | |||
| // set operation overflow info | |||
| overflow_bin_path_ = DataDumpParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_); | |||
| overflow_bin_path_ = DumpJsonParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_); | |||
| // new overflow dump files will have a timestamp greater than last_overflow_bin_ | |||
| last_overflow_bin_ = 0; | |||
| DIR *d; | |||
| @@ -1,178 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debug/e2e_dump.h" | |||
| #include <limits.h> | |||
| #include <fstream> | |||
| #include <string> | |||
| #include <optional> | |||
| #include <nlohmann/json.hpp> | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/system/file_system.h" | |||
| #include "utils/system/env.h" | |||
| #include "utils/convert_utils.h" | |||
| #include "utils/ms_context.h" | |||
| #include "debug/common.h" | |||
| using json = nlohmann::json; | |||
| namespace mindspore { | |||
| Dump::Dump() | |||
| : dump_enable_(false), | |||
| trans_flag_(false), | |||
| dump_path_("/tmp/"), | |||
| dump_net_name_("net_name"), | |||
| dump_mode_(0), | |||
| dump_iter_(0), | |||
| cur_iter_(0) {} | |||
| bool Dump::IsKernelNeedDump(const std::string &kernel_name) { | |||
| if (dump_mode_ == 0) { | |||
| // Dump All Kernels mode | |||
| return true; | |||
| } else { | |||
| auto iter = std::find(dump_kernels_.begin(), dump_kernels_.end(), kernel_name); | |||
| if (iter != dump_kernels_.end()) { | |||
| return true; | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| bool Dump::ParseDumpConfig(const std::string &dump_config_file) { | |||
| std::ifstream jsonFile(dump_config_file); | |||
| if (!jsonFile.is_open()) { | |||
| MS_LOG(ERROR) << dump_config_file << " open failed."; | |||
| dump_enable_ = false; | |||
| return false; | |||
| } | |||
| json j; | |||
| jsonFile >> j; | |||
| if (j.find("DumpSettings") == j.end()) { | |||
| MS_LOG(ERROR) << "DumpSettings is not exist."; | |||
| dump_enable_ = false; | |||
| return false; | |||
| } else { | |||
| json dumpSettings = j.at("DumpSettings"); | |||
| // convert json to string | |||
| std::stringstream ss; | |||
| ss << dumpSettings; | |||
| std::string cfg = ss.str(); | |||
| MS_LOG(INFO) << "E2E Dump Settings Json: " << cfg; | |||
| if (!IsConfigExist(dumpSettings)) { | |||
| return false; | |||
| } | |||
| if (!IsConfigValid(dumpSettings)) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| bool Dump::IsConfigExist(const nlohmann::json &dumpSettings) { | |||
| if (dumpSettings.find("trans_flag") == dumpSettings.end() || dumpSettings.find("enable") == dumpSettings.end() || | |||
| dumpSettings.find("mode") == dumpSettings.end() || dumpSettings.find("path") == dumpSettings.end() || | |||
| dumpSettings.find("net_name") == dumpSettings.end() || dumpSettings.find("iteration") == dumpSettings.end() || | |||
| dumpSettings.find("kernels") == dumpSettings.end()) { | |||
| MS_LOG(ERROR) << "DumpSettings keys is not exist."; | |||
| dump_enable_ = false; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool Dump::IsConfigValid(const nlohmann::json &dumpSettings) { | |||
| auto trans_flag = dumpSettings.at("trans_flag"); | |||
| auto enable = dumpSettings.at("enable"); | |||
| auto mode = dumpSettings.at("mode"); | |||
| auto path = dumpSettings.at("path"); | |||
| auto net_name = dumpSettings.at("net_name"); | |||
| auto iteration = dumpSettings.at("iteration"); | |||
| auto kernels = dumpSettings.at("kernels"); | |||
| if (!(enable.is_boolean() && trans_flag.is_boolean() && mode.is_number() && path.is_string() && | |||
| net_name.is_string() && iteration.is_number() && kernels.is_array())) { | |||
| MS_LOG(ERROR) << "Element's type in Dump config json is invalid."; | |||
| dump_enable_ = false; | |||
| return false; | |||
| } | |||
| dump_enable_ = enable; | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| // dump_enable_ is true, close mem reuse | |||
| context_ptr->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, !dump_enable_); | |||
| trans_flag_ = trans_flag; | |||
| dump_mode_ = mode; | |||
| dump_path_ = path; | |||
| dump_net_name_ = net_name; | |||
| dump_iter_ = iteration; | |||
| for (const auto &kernel : kernels) { | |||
| dump_kernels_.push_back(kernel); | |||
| } | |||
| return true; | |||
| } | |||
| bool Dump::SetDumpConfFromJsonFile() { | |||
| const char *config_path_str = std::getenv("MINDSPORE_CONFIG_PATH"); | |||
| if (config_path_str != nullptr) { | |||
| MS_LOG(INFO) << "Getenv MINDSPORE_CONFIG_PATH :" << config_path_str; | |||
| } else { | |||
| MS_LOG(INFO) << "No need E2E Dump. please export MINDSPORE_CONFIG_PATH eg: MINDSPORE_CONFIG_PATH=/etc"; | |||
| dump_enable_ = false; | |||
| return false; | |||
| } | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| auto id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||
| char real_path[PATH_MAX] = {0}; | |||
| if (nullptr == realpath(config_path_str, real_path)) { | |||
| MS_LOG(ERROR) << "Env e2e dump path error, " << config_path_str; | |||
| dump_enable_ = false; | |||
| return false; | |||
| } | |||
| std::string dump_config_file = std::string(real_path) + "/e2e_dump_config_" + std::to_string(id) + ".json"; | |||
| std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem(); | |||
| MS_EXCEPTION_IF_NULL(fs); | |||
| if (!fs->FileExist(dump_config_file)) { | |||
| MS_LOG(ERROR) << dump_config_file << " not exist."; | |||
| dump_enable_ = false; | |||
| return false; | |||
| } | |||
| return ParseDumpConfig(dump_config_file); | |||
| } | |||
| bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len) { | |||
| if (filename.empty() || data == nullptr || len == 0) { | |||
| MS_LOG(ERROR) << "Incorrect parameter."; | |||
| return false; | |||
| } | |||
| auto realpath = Common::GetRealPath(filename); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed."; | |||
| return false; | |||
| } | |||
| std::ofstream fd; | |||
| fd.open(realpath.value(), std::ios::binary | std::ios::out); | |||
| if (!fd.is_open()) { | |||
| MS_LOG(ERROR) << "Open file " << realpath.value() << " fail."; | |||
| return false; | |||
| } | |||
| (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len)); | |||
| fd.close(); | |||
| return true; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -1,70 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_E2E_DUMP_H | |||
| #define MINDSPORE_E2E_DUMP_H | |||
| #include <stdint.h> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <nlohmann/json.hpp> | |||
| namespace mindspore { | |||
| class Dump { | |||
| public: | |||
| Dump(); | |||
| ~Dump() = default; | |||
| bool dump_enable() const { return dump_enable_; } | |||
| bool trans_flag() const { return trans_flag_; } | |||
| std::string dump_path() const { return dump_path_; } | |||
| std::string dump_net_name() const { return dump_net_name_; } | |||
| uint32_t dump_iter() const { return dump_iter_; } | |||
| void UpdataCurIter() { cur_iter_++; } | |||
| uint32_t cur_iter() const { return cur_iter_; } | |||
| bool IsKernelNeedDump(const std::string &kernel_name); | |||
| bool SetDumpConfFromJsonFile(); | |||
| static bool DumpToFile(const std::string &filename, const void *data, size_t len); | |||
| protected: | |||
| bool dump_enable_; | |||
| bool trans_flag_; | |||
| std::string dump_path_; | |||
| std::string dump_net_name_; | |||
| uint32_t dump_mode_; | |||
| uint32_t dump_iter_; | |||
| uint32_t cur_iter_; | |||
| std::vector<std::string> dump_kernels_; | |||
| private: | |||
| bool ParseDumpConfig(const std::string &dump_config_file); | |||
| bool IsConfigExist(const nlohmann::json &dumpSettings); | |||
| bool IsConfigValid(const nlohmann::json &dumpSettings); | |||
| }; | |||
| using DumpConfPtr = std::shared_ptr<Dump>; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_E2E_DUMP_H | |||
| @@ -24,10 +24,8 @@ | |||
| #include <string> | |||
| #include <utility> | |||
| #include "debug/tensor_data.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #include "ir/dtype.h" | |||
| #ifdef ENABLE_DUMP_E2E | |||
| #include "debug/e2e_dump.h" | |||
| #endif | |||
| namespace mindspore { | |||
| class TensorLoader { | |||
| public: | |||
| @@ -98,7 +96,6 @@ class TensorLoader { | |||
| void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; } | |||
| #ifdef ENABLE_DUMP_E2E | |||
| bool DumpTensorToFile(std::string tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int> &host_shape, TypeId host_type, | |||
| TypeId addr_type_id, std::string addr_format, size_t slot) const { | |||
| @@ -132,12 +129,11 @@ class TensorLoader { | |||
| mindspore::tensor::TensorPtr out_tensor = node->GetTensor(); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| ret = mindspore::Dump::DumpToFile(path, out_tensor->data_c(), host_size); | |||
| ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size); | |||
| } | |||
| return ret; | |||
| } | |||
| #endif | |||
| private: | |||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||
| @@ -30,9 +30,7 @@ | |||
| #include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h" | |||
| #include "utils/utils.h" | |||
| #include "common/trans.h" | |||
| #ifdef ENABLE_DUMP_E2E | |||
| #include "debug/e2e_dump.h" | |||
| #endif | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/tensor_load.h" | |||
| #endif | |||
| @@ -622,7 +620,6 @@ AscendDeviceAddress::~AscendDeviceAddress() { | |||
| } | |||
| } | |||
| #ifdef ENABLE_DUMP_E2E | |||
| bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &filepath, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type) const { | |||
| bool ret = false; | |||
| @@ -649,7 +646,7 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file | |||
| MS_LOG(ERROR) << "Copy device mem to host failed"; | |||
| return ret; | |||
| } | |||
| ret = mindspore::Dump::DumpToFile(path, out_tensor->data_c(), host_size); | |||
| ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size); | |||
| } else { | |||
| auto host_tmp = std::vector<uint8_t>(size_); | |||
| auto ret_rt_memcpy = rtMemcpy(host_tmp.data(), size_, ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
| @@ -659,12 +656,11 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file | |||
| std::string path = | |||
| filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_ + file_extension; | |||
| MS_LOG(INFO) << "E2E Dump path is " << path; | |||
| ret = mindspore::Dump::DumpToFile(path, host_tmp.data(), size_); | |||
| ret = DumpJsonParser::DumpToFile(path, host_tmp.data(), size_); | |||
| } | |||
| return ret; | |||
| } | |||
| #endif | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order, | |||
| @@ -42,10 +42,8 @@ class AscendDeviceAddress : public DeviceAddress { | |||
| bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override; | |||
| bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override; | |||
| DeviceAddressType DeviceType() const override { return DeviceAddressType::kAscend; } | |||
| #ifdef ENABLE_DUMP_E2E | |||
| bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type) const; | |||
| #endif | |||
| const ShapeVector &host_shape, TypeId host_type) const override; | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger, | |||
| @@ -39,6 +39,7 @@ | |||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||
| #include "runtime/device/ascend/ascend_memory_manager.h" | |||
| #include "debug/tensor_load.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #include "utils/shape_utils.h" | |||
| #ifdef MEM_REUSE_DEBUG | |||
| #include "backend/optimizer/mem_reuse/mem_reuse_checker.h" | |||
| @@ -115,7 +116,7 @@ void AscendKernelRuntime::ClearGraphModelMap() { | |||
| } | |||
| graph_data_dumper_.clear(); | |||
| // tell users which dump kernel name not used | |||
| DataDumpParser::GetInstance().PrintUnusedKernel(); | |||
| DumpJsonParser::GetInstance().PrintUnusedKernel(); | |||
| for (auto &iter : graph_model_map_) { | |||
| MS_LOG(INFO) << "Ge UnloadModel " << iter.first; | |||
| @@ -206,15 +207,8 @@ bool AscendKernelRuntime::Init() { | |||
| return true; | |||
| } | |||
| bool ret = false; | |||
| #ifdef ENABLE_DUMP_E2E | |||
| ret = SetDumpConf(); | |||
| if (!ret) { | |||
| MS_LOG(INFO) << "No dump conf to set!"; | |||
| } | |||
| #endif | |||
| DataDumpParser::GetInstance().ParseDumpConfig(); | |||
| DumpJsonParser::GetInstance().Parse(); | |||
| // Start up profiling before rtSetDevice | |||
| ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); | |||
| if (!ret) { | |||
| @@ -233,124 +227,6 @@ bool AscendKernelRuntime::Init() { | |||
| return ret; | |||
| } | |||
| #ifdef ENABLE_DUMP_E2E | |||
| namespace { | |||
| void DumpOutput(mindspore::session::KernelGraph *graph, const string &dump_path, DumpConfPtr dump_conf) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(dump_conf); | |||
| bool trans_flag = dump_conf->trans_flag(); | |||
| const auto &apply_kernels = graph->execution_order(); | |||
| for (const auto &node : apply_kernels) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto node_name = AnfAlgo::GetCNodeName(node); | |||
| std::string kernel_name = node->fullname_with_scope(); | |||
| if (!dump_conf->IsKernelNeedDump(kernel_name)) { | |||
| continue; | |||
| } | |||
| const std::string strsrc = "/"; | |||
| const std::string strdst = "--"; | |||
| std::string::size_type pos = 0; | |||
| std::string::size_type srclen = strsrc.size(); | |||
| std::string::size_type dstlen = strdst.size(); | |||
| while ((pos = kernel_name.find(strsrc, pos)) != std::string::npos) { | |||
| kernel_name.replace(pos, srclen, strdst); | |||
| pos += dstlen; | |||
| } | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(node); | |||
| for (size_t j = 0; j < output_size; ++j) { | |||
| auto addr = AnfAlgo::GetOutputAddr(node, j); | |||
| ShapeVector int_shapes; | |||
| if (trans_flag) { | |||
| int_shapes = trans::GetRuntimePaddingShape(node, j); | |||
| } else { | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(node, j); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| } | |||
| auto type = AnfAlgo::GetOutputInferDataType(node, j); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string filepath = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j); | |||
| auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | |||
| auto ret = ascend_addr->DumpMemToFile(trans_flag, filepath, format, int_shapes, type); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << filepath | |||
| << ", host_format:" << format << ".!"; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void DumpParameters(mindspore::session::KernelGraph *graph, const string &dump_path, DumpConfPtr dump_conf) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(dump_conf); | |||
| bool trans_flag = dump_conf->trans_flag(); | |||
| const auto ¶meters = graph->inputs(); | |||
| for (auto &item : parameters) { | |||
| if (!item->isa<Parameter>()) { | |||
| continue; | |||
| } | |||
| std::string parameter_name = item->fullname_with_scope(); | |||
| if (!dump_conf->IsKernelNeedDump(parameter_name)) { | |||
| continue; | |||
| } | |||
| auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); | |||
| ShapeVector int_shapes; | |||
| if (trans_flag) { | |||
| int_shapes = trans::GetRuntimePaddingShape(item, PRAMATER_OUTPUT_INDEX); | |||
| } else { | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| } | |||
| auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string filepath = dump_path + '/' + parameter_name + '_' + "output_0"; | |||
| auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | |||
| auto ret = ascend_addr->DumpMemToFile(trans_flag, filepath, format, int_shapes, type); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << filepath | |||
| << ", host_format:" << format << ".!"; | |||
| } | |||
| } | |||
| } | |||
| } // namespace | |||
| #endif | |||
| bool AscendKernelRuntime::DumpData(mindspore::session::KernelGraph *graph, Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| #ifdef ENABLE_DUMP_E2E | |||
| MS_LOG(INFO) << "Start dump step"; | |||
| DumpConfPtr dump_conf = GetDumpConf(); | |||
| MS_EXCEPTION_IF_NULL(dump_conf); | |||
| dump_conf->UpdataCurIter(); | |||
| bool dump_flag = dump_conf->dump_enable(); | |||
| if (!dump_flag) { | |||
| MS_LOG(INFO) << "Dump flag is disable, pass dump step"; | |||
| return true; | |||
| } | |||
| uint32_t cur_iter = dump_conf->cur_iter(); | |||
| if (dump_conf->dump_iter() != 0) { | |||
| if (cur_iter != dump_conf->dump_iter()) { | |||
| return true; | |||
| } | |||
| } | |||
| MS_LOG(INFO) << "Cur iter is " << cur_iter; | |||
| std::string net_name = dump_conf->dump_net_name(); | |||
| std::string iterator = to_string(cur_iter); | |||
| std::string dump_path = dump_conf->dump_path(); | |||
| if (dump_path.back() == '/') { | |||
| dump_path = dump_path + net_name + '/' + iterator; | |||
| } else { | |||
| dump_path = dump_path + '/' + net_name + '/' + iterator; | |||
| } | |||
| // dump output | |||
| DumpOutput(graph, dump_path, dump_conf); | |||
| // dump parameters | |||
| DumpParameters(graph, dump_path, dump_conf); | |||
| #endif | |||
| return true; | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| namespace { | |||
| void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) { | |||
| @@ -482,6 +358,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { | |||
| MS_EXCEPTION(NotExistsError) << "session::KernelGraph is NULL!"; | |||
| } | |||
| MS_LOG(INFO) << "GenTask start. GraphId:" << graph->graph_id(); | |||
| DumpJsonParser::GetInstance().UpdateNeedDumpKernels(NOT_NULL(graph)); | |||
| #ifdef MEM_REUSE_DEBUG | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| @@ -580,9 +457,10 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { | |||
| void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph *> graph, | |||
| NotNull<std::function<void *()>> model_handle) { | |||
| if (!DataDumpParser::GetInstance().DumpEnabled()) { | |||
| if (!DumpJsonParser::GetInstance().async_dump_enabled()) { | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "Start Distribute Debug Task"; | |||
| auto data_dumper = std::make_shared<DataDumper>(graph.get(), model_handle); | |||
| MS_EXCEPTION_IF_NULL(data_dumper); | |||
| auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper); | |||
| @@ -593,9 +471,10 @@ void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph | |||
| } | |||
| void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) { | |||
| if (!DataDumpParser::GetInstance().DumpEnabled()) { | |||
| if (!DumpJsonParser::GetInstance().async_dump_enabled()) { | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "Start Launch Dump Data"; | |||
| auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph_id); | |||
| if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) { | |||
| auto &data_dumper = dumper_iter->second; | |||
| @@ -25,7 +25,6 @@ | |||
| #include "framework/ge_runtime/davinci_model.h" | |||
| #include "runtime/device/kernel_runtime_manager.h" | |||
| #include "backend/session/session_basic.h" | |||
| #include "debug/data_dump_parser.h" | |||
| #include "runtime/device/ascend/dump/data_dumper.h" | |||
| using ge::model_runner::TaskInfo; | |||
| @@ -39,7 +38,6 @@ class AscendKernelRuntime : public KernelRuntime { | |||
| AscendKernelRuntime() = default; | |||
| ~AscendKernelRuntime() override; | |||
| bool Init() override; | |||
| bool DumpData(session::KernelGraph *graph, Debugger *debugger = nullptr) override; | |||
| bool LoadData(session::KernelGraph *graph, Debugger *debugger) override; | |||
| bool GenTask(const session::KernelGraph *graph); | |||
| bool LoadTask(const session::KernelGraph *graph); | |||
| @@ -27,7 +27,7 @@ | |||
| #include "runtime/device/ascend/dump/ge_dump.h" | |||
| #include "proto/op_mapping_info.pb.h" | |||
| #include "utils/ms_context.h" | |||
| #include "debug/data_dump_parser.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debugger/debugger.h" | |||
| #endif | |||
| @@ -68,6 +68,27 @@ DataDumper::~DataDumper() { | |||
| ReleaseDevMem(&op_debug_dump_args_); | |||
| } | |||
| void DataDumper::GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const { | |||
| for (const auto &kernel : kernel_graph_->execution_order()) { | |||
| if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL && | |||
| DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) { | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| for (size_t i = 0; i < input_size; ++i) { | |||
| auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i); | |||
| auto input = input_with_index.first; | |||
| if (input->isa<CNode>()) { | |||
| MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope() | |||
| << " Input:" << input->fullname_with_scope(); | |||
| kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>()); | |||
| } | |||
| } | |||
| } else if (KernelNeedDump(kernel)) { | |||
| MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope(); | |||
| kernel_map->try_emplace(kernel->fullname_with_scope(), kernel); | |||
| } | |||
| } | |||
| } | |||
| void DataDumper::LoadDumpInfo() { | |||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo start"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||
| @@ -83,7 +104,7 @@ void DataDumper::LoadDumpInfo() { | |||
| } | |||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope(); | |||
| dump_kernel_names_.emplace_back(kernel->fullname_with_scope()); | |||
| DataDumpParser::GetInstance().MatchKernel(kernel->fullname_with_scope()); | |||
| DumpJsonParser::GetInstance().MatchKernel(kernel->fullname_with_scope()); | |||
| aicpu::dump::Task task; | |||
| ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task)); | |||
| @@ -115,16 +136,16 @@ void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_inf | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||
| auto dump_path = DataDumpParser::GetInstance().GetDumpPath(); | |||
| if (!dump_path.has_value()) { | |||
| auto dump_path = DumpJsonParser::GetInstance().path(); | |||
| if (dump_path.empty()) { | |||
| MS_LOG(EXCEPTION) << "Dump path invalid"; | |||
| } | |||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||
| dump_info->set_dump_path("/" + dump_path.value() + "_" + std::to_string(device_id) + "/"); | |||
| MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value(); | |||
| dump_info->set_dump_path("/" + dump_path + "_" + std::to_string(device_id) + "/"); | |||
| MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path; | |||
| dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id())); | |||
| dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step())); | |||
| dump_info->set_model_name(DumpJsonParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id())); | |||
| dump_info->set_dump_step(std::to_string(DumpJsonParser::GetInstance().iteration())); | |||
| dump_info->set_model_id(kernel_graph_->graph_id()); | |||
| dump_info->set_flag(kAicpuLoadFlag); | |||
| @@ -164,7 +185,7 @@ bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const { | |||
| } | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| // dump all kernel if mode is set 0 in data_dump.json | |||
| return DataDumpParser::GetInstance().NeedDump(kernel->fullname_with_scope()); | |||
| return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope()); | |||
| } | |||
| void DataDumper::UnloadDumpInfo() { | |||
| @@ -258,7 +279,7 @@ void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo | |||
| } | |||
| void DataDumper::OpDebugRegister() { | |||
| uint32_t op_debug_mode = DataDumpParser::GetInstance().op_debug_mode(); | |||
| uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode(); | |||
| auto iter = kOverflowModeStr.find(op_debug_mode); | |||
| if (iter == kOverflowModeStr.end()) { | |||
| MS_LOG(EXCEPTION) << "Invalid op debug mode " << op_debug_mode; | |||
| @@ -294,7 +315,7 @@ void DataDumper::OpDebugRegister() { | |||
| } | |||
| void DataDumper::OpDebugUnregister() { | |||
| uint32_t op_debug_mode = DataDumpParser::GetInstance().op_debug_mode(); | |||
| uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode(); | |||
| if (op_debug_mode == kNoOverflow) { | |||
| MS_LOG(INFO) << "[DataDump] Op debug mode is no overflow, no need to unregister."; | |||
| return; | |||
| @@ -337,6 +358,10 @@ void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) { | |||
| } | |||
| void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) { | |||
| if (!DumpJsonParser::GetInstance().OutputNeedDump()) { | |||
| MS_LOG(INFO) << "Skip dump output"; | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope(); | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | |||
| @@ -367,6 +392,10 @@ void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::T | |||
| } | |||
| void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) { | |||
| if (!DumpJsonParser::GetInstance().InputNeedDump()) { | |||
| MS_LOG(INFO) << "Skip dump input"; | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope(); | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| uint64_t offset = 0; | |||
| @@ -18,6 +18,7 @@ | |||
| #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_DUMP_DATADUMP_H_ | |||
| #include <tuple> | |||
| #include <map> | |||
| #include <set> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| @@ -63,6 +64,7 @@ class DataDumper { | |||
| void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; | |||
| void SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; | |||
| void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const; | |||
| void GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const; | |||
| std::function<void *()> model_handle_; | |||
| uint32_t debug_task_id_; | |||
| @@ -66,6 +66,10 @@ class DeviceAddress : public mindspore::DeviceSync { | |||
| virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; } | |||
| virtual DeviceAddressType DeviceType() const { return DeviceAddressType::kUnknown; } | |||
| void *GetMutablePtr() const override { return ptr_; } | |||
| virtual bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type) const { | |||
| return true; | |||
| } | |||
| protected: | |||
| const void *ptr() const { return ptr_; } | |||
| @@ -33,6 +33,7 @@ | |||
| #include "ir/dtype.h" | |||
| #include "profiler/device/gpu/gpu_profiling.h" | |||
| #include "utils/shape_utils.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debug_services.h" | |||
| #endif | |||
| @@ -51,19 +52,12 @@ bool GPUKernelRuntime::Init() { | |||
| GPUMemoryAllocator::GetInstance().CheckMaxDeviceMemory(); | |||
| return true; | |||
| } | |||
| bool ret = false; | |||
| #ifdef ENABLE_DUMP_E2E | |||
| ret = SetDumpConf(); | |||
| if (!ret) { | |||
| MS_LOG(INFO) << "No dump conf to set!"; | |||
| } | |||
| #endif | |||
| ret = InitDevice(); | |||
| bool ret = InitDevice(); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "InitDevice error."; | |||
| return ret; | |||
| } | |||
| DumpJsonParser::GetInstance().Parse(); | |||
| mem_manager_ = std::make_shared<GPUMemoryManager>(); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| mem_manager_->MallocDeviceMemory(); | |||
| @@ -79,146 +73,6 @@ bool GPUKernelRuntime::Init() { | |||
| return ret; | |||
| } | |||
| #ifdef ENABLE_DUMP_E2E | |||
| namespace { | |||
| void DumpOutput(mindspore::session::KernelGraph *graph, const string &dump_path, DumpConfPtr dump_conf, | |||
| Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(dump_conf); | |||
| bool trans_flag = dump_conf->trans_flag(); | |||
| const auto &apply_kernels = graph->execution_order(); | |||
| for (const auto &node : apply_kernels) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto node_name = AnfAlgo::GetCNodeName(node); | |||
| std::string kernel_name = node->fullname_with_scope(); | |||
| if (!dump_conf->IsKernelNeedDump(kernel_name)) { | |||
| continue; | |||
| } | |||
| const std::string strsrc = "/"; | |||
| const std::string strdst = "--"; | |||
| std::string::size_type pos = 0; | |||
| std::string::size_type srclen = strsrc.size(); | |||
| std::string::size_type dstlen = strdst.size(); | |||
| while ((pos = kernel_name.find(strsrc, pos)) != std::string::npos) { | |||
| kernel_name.replace(pos, srclen, strdst); | |||
| pos += dstlen; | |||
| } | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(node); | |||
| for (size_t j = 0; j < output_size; ++j) { | |||
| auto addr = AnfAlgo::GetOutputAddr(node, j); | |||
| TypeId addr_type_id = addr->type_id(); | |||
| std::string addr_format = addr->format(); | |||
| ShapeVector int_shapes; | |||
| if (trans_flag) { | |||
| int_shapes = trans::GetRuntimePaddingShape(node, j); | |||
| } else { | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(node, j); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| } | |||
| auto type = AnfAlgo::GetOutputInferDataType(node, j); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string filepath = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j); | |||
| DebugServices *debug_services = debugger->debug_services(); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| std::string original_kernel_name = node->fullname_with_scope(); | |||
| size_t slot = j; | |||
| auto ret = tensor_loader->DumpTensorToFile(original_kernel_name, trans_flag, filepath, format, int_shapes, type, | |||
| addr_type_id, addr_format, slot); | |||
| if (!ret) { | |||
| std::string error = "DumpTensorToFile Failed: flag:" + std::to_string(trans_flag) + ", path:" + filepath + | |||
| ", host_format:" + format + ".!"; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void DumpParameters(mindspore::session::KernelGraph *graph, const string &dump_path, DumpConfPtr dump_conf, | |||
| Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(dump_conf); | |||
| bool trans_flag = dump_conf->trans_flag(); | |||
| const auto ¶meters = graph->inputs(); | |||
| for (auto &item : parameters) { | |||
| if (!item->isa<Parameter>()) { | |||
| continue; | |||
| } | |||
| std::string parameter_name = item->fullname_with_scope(); | |||
| if (!dump_conf->IsKernelNeedDump(parameter_name)) { | |||
| continue; | |||
| } | |||
| auto addr = AnfAlgo::GetOutputAddr(item, PARAMETER_OUTPUT_INDEX); | |||
| TypeId addr_type_id = addr->type_id(); | |||
| std::string addr_format = addr->format(); | |||
| ShapeVector int_shapes; | |||
| if (trans_flag) { | |||
| int_shapes = trans::GetRuntimePaddingShape(item, PARAMETER_OUTPUT_INDEX); | |||
| } else { | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(item, PARAMETER_OUTPUT_INDEX); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| } | |||
| auto type = AnfAlgo::GetOutputInferDataType(item, PARAMETER_OUTPUT_INDEX); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string filepath = dump_path + '/' + parameter_name + '_' + "output_0"; | |||
| DebugServices *debug_services = debugger->debug_services(); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| std::string original_kernel_name = parameter_name; | |||
| size_t slot = 0; | |||
| auto ret = tensor_loader->DumpTensorToFile(original_kernel_name, trans_flag, filepath, format, int_shapes, type, | |||
| addr_type_id, addr_format, slot); | |||
| if (!ret) { | |||
| std::string error = "DumpTensorToFile Failed: flag:" + std::to_string(trans_flag) + ", path:" + filepath + | |||
| ", host_format:" + format + ".!"; | |||
| } | |||
| } | |||
| } | |||
| } // namespace | |||
| bool GPUKernelRuntime::DumpData(mindspore::session::KernelGraph *graph, Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_LOG(INFO) << "Start dump step"; | |||
| DumpConfPtr dump_conf = GetDumpConf(); | |||
| MS_EXCEPTION_IF_NULL(dump_conf); | |||
| dump_conf->UpdataCurIter(); | |||
| bool dump_flag = dump_conf->dump_enable(); | |||
| if (!dump_flag) { | |||
| MS_LOG(INFO) << "Dump flag is disable, pass dump step"; | |||
| return true; | |||
| } | |||
| uint32_t cur_iter = dump_conf->cur_iter(); | |||
| if (dump_conf->dump_iter() != 0) { | |||
| if (cur_iter != dump_conf->dump_iter()) { | |||
| return true; | |||
| } | |||
| } | |||
| MS_LOG(INFO) << "Cur iter is " << cur_iter; | |||
| std::string net_name = dump_conf->dump_net_name(); | |||
| std::string iterator = std::to_string(cur_iter); | |||
| std::string dump_path = dump_conf->dump_path(); | |||
| if (dump_path.back() == '/') { | |||
| dump_path = dump_path + net_name + '/' + iterator; | |||
| } else { | |||
| dump_path = dump_path + '/' + net_name + '/' + iterator; | |||
| } | |||
| // dump output | |||
| DumpOutput(graph, dump_path, dump_conf, debugger); | |||
| // dump parameters | |||
| DumpParameters(graph, dump_path, dump_conf, debugger); | |||
| return true; | |||
| } | |||
| #endif | |||
| #ifdef ENABLE_DEBUGGER | |||
| namespace { | |||
| void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||
| @@ -43,9 +43,6 @@ class GPUKernelRuntime : public KernelRuntime { | |||
| const std::vector<CNodePtr> &execution_order) override; | |||
| void AssignMemory(session::KernelGraph *graph) override; | |||
| bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override; | |||
| #ifdef ENABLE_DUMP_E2E | |||
| bool DumpData(session::KernelGraph *graph, Debugger *debugger = nullptr) override; | |||
| #endif | |||
| protected: | |||
| DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | |||
| @@ -27,6 +27,7 @@ | |||
| #include "backend/session/kernel_graph.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "backend/optimizer/common/helper.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #include "ir/value.h" | |||
| #include "utils/shape_utils.h" | |||
| using mindspore::kernel::Address; | |||
| @@ -34,21 +35,10 @@ using mindspore::kernel::AddressPtr; | |||
| namespace mindspore { | |||
| namespace device { | |||
| KernelRuntime::~KernelRuntime() { | |||
| #ifdef ENABLE_DUMP_E2E | |||
| dump_conf_ptr_ = nullptr; | |||
| #endif | |||
| } | |||
| KernelRuntime::~KernelRuntime() {} | |||
| bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; } | |||
| bool KernelRuntime::DumpData(mindspore::session::KernelGraph *graph, Debugger *debugger) { | |||
| if (graph != nullptr) { | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| bool KernelRuntime::LoadData(session::KernelGraph *graph, Debugger *debugger) { return false; } | |||
| bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) { | |||
| @@ -134,36 +124,21 @@ void KernelRuntime::RunOpClearMemory(const session::KernelGraph *graph) { | |||
| } | |||
| bool KernelRuntime::DumpDataEnabled() { | |||
| bool ret = false; | |||
| #ifdef ENABLE_DUMP_E2E | |||
| DumpConfPtr dump_conf = GetDumpConf(); | |||
| MS_EXCEPTION_IF_NULL(dump_conf); | |||
| bool dump_flag = dump_conf->dump_enable(); | |||
| if (!dump_flag) { | |||
| return ret; | |||
| } | |||
| ret = true; | |||
| #endif | |||
| return ret; | |||
| auto &dump_json_parser = DumpJsonParser::GetInstance(); | |||
| return dump_json_parser.e2e_dump_enabled(); | |||
| } | |||
| bool KernelRuntime::DumpDataEnabledIteration() { | |||
| bool ret = false; | |||
| #ifdef ENABLE_DUMP_E2E | |||
| if (!DumpDataEnabled()) { | |||
| return ret; | |||
| auto &dump_json_parser = DumpJsonParser::GetInstance(); | |||
| if (!dump_json_parser.e2e_dump_enabled()) { | |||
| return false; | |||
| } | |||
| DumpConfPtr dump_conf = GetDumpConf(); | |||
| MS_EXCEPTION_IF_NULL(dump_conf); | |||
| uint32_t cur_iter = dump_conf->cur_iter() + 1; | |||
| if (dump_conf->dump_iter() != 0) { | |||
| if (cur_iter != dump_conf->dump_iter()) { | |||
| return ret; | |||
| } | |||
| auto cur_iter = dump_json_parser.cur_dump_iter() + 1; | |||
| if (dump_json_parser.iteration() != 0) { | |||
| return cur_iter == dump_json_parser.iteration(); | |||
| } | |||
| ret = true; | |||
| #endif | |||
| return ret; | |||
| return true; | |||
| } | |||
| void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) { | |||
| @@ -858,16 +833,5 @@ DeviceAddressPtr KernelRuntime::AssignSingleOpLaunchMemory(size_t size, const st | |||
| MS_EXCEPTION_IF_NULL(base_ptr); | |||
| return device_address; | |||
| } | |||
| #ifdef ENABLE_DUMP_E2E | |||
| bool KernelRuntime::SetDumpConf() { | |||
| dump_conf_ptr_ = std::make_shared<Dump>(); | |||
| MS_EXCEPTION_IF_NULL(dump_conf_ptr_); | |||
| bool ret = dump_conf_ptr_->SetDumpConfFromJsonFile(); | |||
| return ret; | |||
| } | |||
| DumpConfPtr KernelRuntime::GetDumpConf() { return dump_conf_ptr_; } | |||
| #endif | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -24,9 +24,6 @@ | |||
| #include "runtime/device/device_address.h" | |||
| #include "ir/tensor.h" | |||
| #include "utils/convert_utils.h" | |||
| #ifdef ENABLE_DUMP_E2E | |||
| #include "debug/e2e_dump.h" | |||
| #endif | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debugger/debugger.h" | |||
| #endif | |||
| @@ -58,7 +55,6 @@ class KernelRuntime { | |||
| void RunOpClearMemory(const session::KernelGraph *graph); | |||
| bool DumpDataEnabled(); | |||
| bool DumpDataEnabledIteration(); | |||
| virtual bool DumpData(session::KernelGraph *graph, Debugger *debugger = nullptr); | |||
| virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger); | |||
| virtual bool Load(session::KernelGraph *graph, bool is_task_sink); | |||
| virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0; | |||
| @@ -77,9 +73,6 @@ class KernelRuntime { | |||
| virtual bool SyncStream() = 0; | |||
| virtual void ClearGlobalIdleMem() {} | |||
| #ifdef ENABLE_DUMP_E2E | |||
| DumpConfPtr GetDumpConf(); | |||
| #endif | |||
| // for GPU and D to impl | |||
| virtual void ReleaseDeviceRes() {} | |||
| void set_device_id(uint32_t device_id) { device_id_ = device_id; } | |||
| @@ -101,9 +94,6 @@ class KernelRuntime { | |||
| void AssignCommunicationNodeOutputMem(MemType type, const AnfNodePtr &node); | |||
| void AssignCommunicationNodeInputMem(MemType type, const AnfNodePtr &node); | |||
| void AssignCommunicationNodeMem(MemType type, const AnfNodePtr &node); | |||
| #ifdef ENABLE_DUMP_E2E | |||
| bool SetDumpConf(); | |||
| #endif | |||
| private: | |||
| void AssignStaticMemoryOutput(session::KernelGraph *graph); | |||
| @@ -121,10 +111,6 @@ class KernelRuntime { | |||
| protected: | |||
| uint32_t device_id_{0}; | |||
| #ifdef ENABLE_DUMP_E2E | |||
| DumpConfPtr dump_conf_ptr_; | |||
| #endif | |||
| #ifdef ENABLE_DEBUGGER | |||
| Debugger *debugger_; | |||
| #endif | |||
| @@ -4,7 +4,6 @@ message("build ut testcases...") | |||
| project(ut) | |||
| set(PROJECT_DIR "${PROJECT_SOURCE_DIR}/../../..") | |||
| add_compile_definitions(ENABLE_DUMP_E2E) | |||
| if(ENABLE_DUMP_IR) | |||
| add_compile_definitions(ENABLE_DUMP_IR) | |||
| endif(ENABLE_DUMP_IR) | |||
| @@ -84,9 +83,8 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "../../../mindspore/ccsrc/frontend/parallel/*.cc" | |||
| "../../../mindspore/ccsrc/frontend/operator/*.cc" | |||
| # dont remove the 4 lines above | |||
| "../../../mindspore/ccsrc/debug/e2e_dump.cc" | |||
| "../../../mindspore/ccsrc/debug/data_dump/dump_json_parser.cc" | |||
| "../../../mindspore/ccsrc/debug/common.cc" | |||
| "../../../mindspore/ccsrc/debug/data_dump_parser.cc" | |||
| "../../../mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc" | |||
| "../../../mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc" | |||
| "../../../mindspore/ccsrc/runtime/device/kernel_runtime.cc" | |||
| @@ -20,7 +20,7 @@ | |||
| #include "utils/system/file_system.h" | |||
| #include "utils/system/env.h" | |||
| #define private public | |||
| #include "debug/e2e_dump.h" | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #undef private | |||
| namespace mindspore { | |||
| @@ -38,7 +38,7 @@ TEST_F(TestMemoryDumper, test_DumpToFileAbsPath) { | |||
| int ret; | |||
| char filename[] = "/tmp/dumpToFileTestFile"; | |||
| ret = mindspore::Dump::DumpToFile(filename, data, len * sizeof(int)); | |||
| ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int)); | |||
| ASSERT_EQ(ret, true); | |||
| int fd = open(filename, O_RDONLY); | |||
| @@ -70,7 +70,7 @@ TEST_F(TestMemoryDumper, test_DumpToFileRelativePath) { | |||
| int ret; | |||
| char filename[] = "../../dumpToFileTestFile"; | |||
| ret = mindspore::Dump::DumpToFile(filename, data, len * sizeof(int)); | |||
| ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int)); | |||
| ASSERT_EQ(ret, true); | |||
| int fd = open(filename, O_RDONLY); | |||
| @@ -102,7 +102,7 @@ TEST_F(TestMemoryDumper, test_DumpToFileNotExistDir) { | |||
| } | |||
| char filename[] = "./tmp/dumpToFileTestFile"; | |||
| int ret = mindspore::Dump::DumpToFile(filename, data, len * sizeof(int)); | |||
| int ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int)); | |||
| ASSERT_EQ(ret, true); | |||
| int fd = open(filename, O_RDONLY); | |||