| @@ -56,7 +56,7 @@ usage() | |||
| echo " -K Compile with AKG, default on" | |||
| echo " -s Enable serving module, default off" | |||
| echo " -w Enable acl module, default off" | |||
| echo " -B Enable debugger, default off" | |||
| echo " -B Enable debugger, default on" | |||
| echo " -E Enable IBVERBS for parameter server, default off" | |||
| echo " -l Compile with python dependency, default on" | |||
| } | |||
| @@ -102,7 +102,7 @@ checkopts() | |||
| ENABLE_AKG="on" | |||
| ENABLE_SERVING="off" | |||
| ENABLE_ACL="off" | |||
| ENABLE_DEBUGGER="off" | |||
| ENABLE_DEBUGGER="on" | |||
| ENABLE_IBVERBS="off" | |||
| ENABLE_PYTHON="on" | |||
| ENABLE_GPU="off" | |||
| @@ -282,8 +282,7 @@ checkopts() | |||
| ;; | |||
| B) | |||
| check_on_off $OPTARG B | |||
| ENABLE_DEBUGGER="on" | |||
| echo "enable debugger" | |||
| ENABLE_DEBUGGER="$OPTARG" | |||
| ;; | |||
| E) | |||
| ENABLE_IBVERBS="on" | |||
| @@ -16,9 +16,6 @@ | |||
| #include "backend/kernel_compiler/cpu/debug_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "utils/ms_utils.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debugger/debugger.h" | |||
| #endif | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| @@ -39,11 +36,6 @@ bool DebugCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| output[i] = val[i]; | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| // debugger will suspend execution is neccessary | |||
| Debugger::GetInstance()->PostDebugOp(); | |||
| #endif | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| @@ -80,11 +80,13 @@ bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr | |||
| MS_EXCEPTION_IF_NULL(kernel_prev); | |||
| #ifdef ENABLE_DEBUGGER | |||
| auto debugger_ = mindspore::Debugger::GetInstance(); | |||
| DebugServices *debug_services = debugger_->debug_services(); | |||
| auto watchpoint_table = debug_services->GetWatchpointTable(); | |||
| std::string current_kernel_name = kernel_curr->scope_full_name(); | |||
| if (debug_services->IsWatchPoint(current_kernel_name, watchpoint_table)) { | |||
| return false; | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| DebugServices *debug_services = debugger_->debug_services(); | |||
| auto watchpoint_table = debug_services->GetWatchpointTable(); | |||
| std::string current_kernel_name = kernel_curr->scope_full_name(); | |||
| if (debug_services->IsWatchPoint(current_kernel_name, watchpoint_table)) { | |||
| return false; | |||
| } | |||
| } | |||
| #endif | |||
| auto curr_stream_id = kernel_curr->stream_id(); | |||
| @@ -605,16 +605,18 @@ void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) | |||
| MS_LOG(INFO) << "Start!"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| #ifdef ENABLE_DEBUGGER | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| DebugServices *debug_services = debugger_->debug_services(); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| // TensorData will be freed up here | |||
| tensor_loader->EmptyTensor(); | |||
| uint32_t iter_num = tensor_loader->GetIterNum(); | |||
| tensor_loader->set_iter_num(++iter_num); | |||
| (void)runtime_instance->LoadData(kernel_graph.get(), debugger_.get()); | |||
| tensor_loader->EmptyPrevTensor(); | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| DebugServices *debug_services = debugger_->debug_services(); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| // TensorData will be freed up here | |||
| tensor_loader->EmptyTensor(); | |||
| uint32_t iter_num = tensor_loader->GetIterNum(); | |||
| tensor_loader->set_iter_num(++iter_num); | |||
| (void)runtime_instance->LoadData(kernel_graph.get(), debugger_.get()); | |||
| tensor_loader->EmptyPrevTensor(); | |||
| } | |||
| #endif | |||
| MS_LOG(INFO) << "Finish!"; | |||
| } | |||
| @@ -26,9 +26,6 @@ | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| #include "backend/optimizer/common/pass_manager.h" | |||
| #include "backend/optimizer/pass/replace_node_by_proxy.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debugger/debugger.h" | |||
| #endif | |||
| #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) | |||
| #include "frontend/parallel/ps/util.h" | |||
| #endif | |||
| @@ -112,12 +109,7 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten | |||
| summary_outputs = kernel_graph->summary_nodes(); | |||
| runtime_.IncreaseSummaryRefCount(summary_outputs); | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| // debugger pre-execution processing | |||
| if (debugger_) { | |||
| debugger_->PreExecute(kernel_graph); | |||
| } | |||
| #endif | |||
| bool ret = runtime_.Run(kernel_graph.get(), false); | |||
| if (!ret) { | |||
| MS_LOG(EXCEPTION) << "Run graph failed"; | |||
| @@ -128,12 +120,6 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten | |||
| runtime_.DecreaseSummaryRefCount(summary_outputs); | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| // debugger post-execution processing | |||
| if (debugger_) { | |||
| debugger_->PostExecute(); | |||
| } | |||
| #endif | |||
| MS_LOG(INFO) << "Run graph end"; | |||
| } | |||
| @@ -351,10 +351,12 @@ void GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info | |||
| #ifdef ENABLE_DEBUGGER | |||
| void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| #ifdef ENABLE_DUMP_E2E | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| (void)runtime_instance->DumpData(kernel_graph.get(), debugger_.get()); | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| (void)runtime_instance->DumpData(kernel_graph.get(), debugger_.get()); | |||
| } | |||
| #endif | |||
| } | |||
| @@ -80,25 +80,16 @@ void Debugger::EnableDebugger() { | |||
| grpc_client_ = nullptr; | |||
| debug_services_ = nullptr; | |||
| // see if dump is enabled | |||
| bool dump_enabled = false; | |||
| if (device_target_ == kGPUDevice) { | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| dump_enabled = runtime_instance->DumpDataEnabled(); | |||
| } | |||
| // see if dump using debugger backend is enabled | |||
| bool dump_enabled = CheckDebuggerDumpEnabled(); | |||
| MS_LOG(INFO) << "dump using debugger backend = " << dump_enabled; | |||
| // get env variables to configure debugger | |||
| const char *env_enable_str = std::getenv("ENABLE_MS_DEBUGGER"); | |||
| if (env_enable_str != nullptr) { | |||
| MS_LOG(INFO) << "Getenv ENABLE_MS_DEBUGGER: " << env_enable_str; | |||
| if (std::strcmp(env_enable_str, "1") == 0) { | |||
| debugger_enabled_ = true; | |||
| } | |||
| } | |||
| // check if debugger enabled | |||
| debugger_enabled_ = CheckDebuggerEnabled(); | |||
| MS_LOG(INFO) << "debugger_enabled_ = " << debugger_enabled_; | |||
| if (!debugger_enabled_ && !dump_enabled) { | |||
| MS_LOG(WARNING) << "Not enabling debugger. Set environment variable ENABLE_MS_DEBUGGER=1 to enable debugger."; | |||
| MS_LOG(INFO) << "Not enabling debugger. Set environment variable ENABLE_MS_DEBUGGER=1 to enable debugger."; | |||
| return; | |||
| } | |||
| @@ -109,7 +100,7 @@ void Debugger::EnableDebugger() { | |||
| MS_LOG(INFO) << "Getenv MS_DEBUGGER_HOST: " << env_host_str; | |||
| host = std::string(env_host_str); | |||
| } else { | |||
| MS_LOG(WARNING) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost"; | |||
| MS_LOG(INFO) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost"; | |||
| host = "localhost"; | |||
| } | |||
| // configure grpc port | |||
| @@ -119,7 +110,7 @@ void Debugger::EnableDebugger() { | |||
| MS_LOG(INFO) << "Getenv MS_DEBUGGER_PORT: " << env_port_str; | |||
| port = std::string(env_port_str); | |||
| } else { | |||
| MS_LOG(WARNING) << "Environment variable MS_DEBUGGER_PORT doesn't exist. Using default debugger port: 50051"; | |||
| MS_LOG(INFO) << "Environment variable MS_DEBUGGER_PORT doesn't exist. Using default debugger port: 50051"; | |||
| port = "50051"; | |||
| } | |||
| @@ -140,8 +131,8 @@ void Debugger::EnableDebugger() { | |||
| MS_LOG(WARNING) << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first " | |||
| "step. 2. Tensor values are only available for nodes that are watched by any watchpoint."; | |||
| } else { | |||
| MS_LOG(WARNING) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory " | |||
| "usage for large models."; | |||
| MS_LOG(INFO) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory " | |||
| "usage for large models."; | |||
| } | |||
| #ifdef ENABLE_D | |||
| // set operation overflow info | |||
| @@ -180,6 +171,29 @@ void Debugger::EnableDebugger() { | |||
| debug_services_ = std::make_unique<DebugServices>(); | |||
| } | |||
| bool Debugger::CheckDebuggerDumpEnabled() { | |||
| // see if dump is enabled | |||
| if (device_target_ == kGPUDevice) { | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| return runtime_instance->DumpDataEnabled(); | |||
| } | |||
| return false; | |||
| } | |||
| bool Debugger::CheckDebuggerEnabled() { | |||
| // get env variables to configure debugger | |||
| const char *env_enable_str = std::getenv("ENABLE_MS_DEBUGGER"); | |||
| if (env_enable_str != nullptr) { | |||
| if (std::strcmp(env_enable_str, "1") == 0) { | |||
| return true; | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| bool Debugger::DebuggerBackendEnabled() { return CheckDebuggerDumpEnabled() || CheckDebuggerEnabled(); } | |||
| void Debugger::Reset() { | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| @@ -201,25 +215,29 @@ void Debugger::Reset() { | |||
| void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) { | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| // check and save graph_ptr, suspend if graph is new | |||
| CheckGraphPtr(graph_ptr); | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| // check and save graph_ptr, suspend if graph is new | |||
| CheckGraphPtr(graph_ptr); | |||
| } | |||
| } | |||
| void Debugger::PostExecute() { | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| // analyze tensor data and send the watchpoints been hit | |||
| if (run_level_ == "node") { | |||
| MS_LOG(INFO) << "Debugger is in node level mode "; | |||
| return; | |||
| } | |||
| if (debugger_enabled_ && !is_dataset_graph_) { | |||
| if (device_target_ != kGPUDevice) { | |||
| num_step_++; | |||
| MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | |||
| SendWatchpointsAndSuspend(CheckWatchpoints()); | |||
| } else { | |||
| CommandLoop(); | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| // analyze tensor data and send the watchpoints been hit | |||
| if (run_level_ == "node") { | |||
| MS_LOG(INFO) << "Debugger is in node level mode "; | |||
| return; | |||
| } | |||
| if (debugger_enabled_ && !is_dataset_graph_) { | |||
| if (device_target_ != kGPUDevice) { | |||
| num_step_++; | |||
| MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | |||
| SendWatchpointsAndSuspend(CheckWatchpoints()); | |||
| } else { | |||
| CommandLoop(); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -302,8 +320,8 @@ void Debugger::CheckDatasetGraph() { | |||
| auto node_name = AnfAlgo::GetCNodeName(node); | |||
| MS_LOG(INFO) << "node: " << node->fullname_with_scope(); | |||
| if (node_name == "GetNext" || node_name == "InitDataSetQueue") { | |||
| MS_LOG(WARNING) << "Not enabling debugger for graph " << graph_ptr_->graph_id() << ": found dataset graph node " | |||
| << node_name; | |||
| MS_LOG(INFO) << "Not enabling debugger for graph " << graph_ptr_->graph_id() << ": found dataset graph node " | |||
| << node_name; | |||
| is_dataset_graph_ = true; | |||
| return; | |||
| } | |||
| @@ -96,6 +96,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| std::map<std::pair<uint32_t, uint32_t>, std::string> &GetStreamTaskToOpnameMap(); | |||
| // check if any feature that uses the debugger backend is enabled | |||
| bool DebuggerBackendEnabled(); | |||
| private: | |||
| // private constructor for singleton | |||
| Debugger(); | |||
| @@ -105,6 +108,12 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // read env variable for grpc client | |||
| void EnableDebugger(); | |||
| // check if dump using debugger backend is enabled | |||
| bool CheckDebuggerDumpEnabled(); | |||
| // check if debugger enabled | |||
| bool CheckDebuggerEnabled(); | |||
| // check and save graph pointer | |||
| void CheckGraphPtr(const KernelGraphPtr &graph_ptr); | |||
| @@ -40,7 +40,7 @@ class AscendKernelRuntime : public KernelRuntime { | |||
| ~AscendKernelRuntime() override; | |||
| bool Init() override; | |||
| bool DumpData(session::KernelGraph *graph, Debugger *debugger = nullptr) override; | |||
| bool LoadData(session::KernelGraph *graph, Debugger *debugger); | |||
| bool LoadData(session::KernelGraph *graph, Debugger *debugger) override; | |||
| bool GenTask(const session::KernelGraph *graph); | |||
| bool LoadTask(const session::KernelGraph *graph); | |||
| bool RunTask(const session::KernelGraph *graph); | |||
| @@ -97,14 +97,16 @@ void DataDumper::LoadDumpInfo() { | |||
| #ifdef ENABLE_DEBUGGER | |||
| auto debugger = mindspore::Debugger::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(debugger); | |||
| std::map<std::pair<uint32_t, uint32_t>, std::string> &stream_task_to_opname = debugger->GetStreamTaskToOpnameMap(); | |||
| // extract stream id, task id and opname from runtime_info_map for overflow detection | |||
| std::transform(runtime_info_map_.begin(), runtime_info_map_.end(), | |||
| std::inserter(stream_task_to_opname, stream_task_to_opname.end()), | |||
| [](const std::pair<std::string, std::shared_ptr<RuntimeInfo>> &p) | |||
| -> std::pair<std::pair<uint32_t, uint32_t>, std::string> { | |||
| return {{std::get<1>(*p.second), std::get<0>(*p.second)}, p.first}; | |||
| }); | |||
| if (debugger->DebuggerBackendEnabled()) { | |||
| std::map<std::pair<uint32_t, uint32_t>, std::string> &stream_task_to_opname = debugger->GetStreamTaskToOpnameMap(); | |||
| // extract stream id, task id and opname from runtime_info_map for overflow detection | |||
| std::transform(runtime_info_map_.begin(), runtime_info_map_.end(), | |||
| std::inserter(stream_task_to_opname, stream_task_to_opname.end()), | |||
| [](const std::pair<std::string, std::shared_ptr<RuntimeInfo>> &p) | |||
| -> std::pair<std::pair<uint32_t, uint32_t>, std::string> { | |||
| return {{std::get<1>(*p.second), std::get<0>(*p.second)}, p.first}; | |||
| }); | |||
| } | |||
| #endif | |||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo end"; | |||
| } | |||
| @@ -49,6 +49,8 @@ bool KernelRuntime::DumpData(mindspore::session::KernelGraph *graph, Debugger *d | |||
| return false; | |||
| } | |||
| bool KernelRuntime::LoadData(session::KernelGraph *graph, Debugger *debugger) { return false; } | |||
| bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| if (AnfAlgo::OutputAddrExist(kernel, index)) { | |||
| @@ -59,6 +59,7 @@ class KernelRuntime { | |||
| bool DumpDataEnabled(); | |||
| bool DumpDataEnabledIteration(); | |||
| virtual bool DumpData(session::KernelGraph *graph, Debugger *debugger = nullptr); | |||
| virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger); | |||
| virtual bool Load(session::KernelGraph *graph, bool is_task_sink); | |||
| virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0; | |||
| bool LaunchKernel(const session::KernelGraph *graph); | |||
| @@ -53,11 +53,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { | |||
| set_param<bool>(MS_CTX_ENABLE_TASK_SINK, true); | |||
| set_param<bool>(MS_CTX_IR_FUSION_FLAG, true); | |||
| set_param<bool>(MS_CTX_ENABLE_HCCL, false); | |||
| #ifdef ENABLE_DEBUGGER | |||
| set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, false); | |||
| #else | |||
| set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, true); | |||
| #endif | |||
| set_param<bool>(MS_CTX_ENABLE_GPU_SUMMARY, true); | |||
| set_param<bool>(MS_CTX_PRECOMPILE_ONLY, false); | |||
| set_param<bool>(MS_CTX_ENABLE_AUTO_MIXED_PRECISION, false); | |||