Merge pull request !6499 from john_tzanakakis/master_ms1_grpctags/v1.1.0
| @@ -174,11 +174,9 @@ GraphId AscendSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) { | |||||
| device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get())); | device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get())); | ||||
| // build kernel | // build kernel | ||||
| BuildKernel(root_graph); | BuildKernel(root_graph); | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| if (debugger_) { | if (debugger_) { | ||||
| debugger_->PreExecute(root_graph); | debugger_->PreExecute(root_graph); | ||||
| } | } | ||||
| #endif | |||||
| // alloc mem | // alloc mem | ||||
| MemoryAlloc(root_graph.get()); | MemoryAlloc(root_graph.get()); | ||||
| // generate and load task into device | // generate and load task into device | ||||
| @@ -249,11 +247,9 @@ void AscendSession::BuildGraph(GraphId graph_id) { | |||||
| BuildKernel(graph); | BuildKernel(graph); | ||||
| auto ms_context = MsContext::GetInstance(); | auto ms_context = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(ms_context); | MS_EXCEPTION_IF_NULL(ms_context); | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| if (debugger_) { | if (debugger_) { | ||||
| debugger_->PreExecute(graph); | debugger_->PreExecute(graph); | ||||
| } | } | ||||
| #endif | |||||
| if (ms_context->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) { | if (ms_context->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) { | ||||
| MS_LOG(INFO) << "Precompile only, stop in build kernel step"; | MS_LOG(INFO) << "Precompile only, stop in build kernel step"; | ||||
| } else { | } else { | ||||
| @@ -325,18 +321,14 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor:: | |||||
| } | } | ||||
| // summary | // summary | ||||
| Summary(kernel_graph.get()); | Summary(kernel_graph.get()); | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| // load tensor from device for debugger | // load tensor from device for debugger | ||||
| if (debugger_ && debugger_->debugger_enabled()) { | if (debugger_ && debugger_->debugger_enabled()) { | ||||
| LoadTensor(kernel_graph); | LoadTensor(kernel_graph); | ||||
| } | } | ||||
| #endif | |||||
| #ifdef ENABLE_DEBUGGER | |||||
| // debugger post-execution processing | // debugger post-execution processing | ||||
| if (debugger_) { | if (debugger_) { | ||||
| debugger_->PostExecute(); | debugger_->PostExecute(); | ||||
| } | } | ||||
| #endif | |||||
| MS_LOG(INFO) << "Finish!"; | MS_LOG(INFO) << "Finish!"; | ||||
| } | } | ||||
| @@ -282,9 +282,7 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList | |||||
| void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) { | void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) { | ||||
| auto &kernel_graph = graphs_[graph_id]; | auto &kernel_graph = graphs_[graph_id]; | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| PreIterationDbg(kernel_graph); | PreIterationDbg(kernel_graph); | ||||
| #endif | |||||
| // Load input data from user input | // Load input data from user input | ||||
| LoadInputData(kernel_graph, inputs); | LoadInputData(kernel_graph, inputs); | ||||
| #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) | #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) | ||||
| @@ -293,18 +291,14 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten | |||||
| #endif | #endif | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| Execute(kernel_graph); | Execute(kernel_graph); | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| PostLoadTensor(kernel_graph); | PostLoadTensor(kernel_graph); | ||||
| #endif | |||||
| // Summary | // Summary | ||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| if (context_ptr->get_param<bool>(MS_CTX_ENABLE_GPU_SUMMARY)) { | if (context_ptr->get_param<bool>(MS_CTX_ENABLE_GPU_SUMMARY)) { | ||||
| Summary(kernel_graph.get()); | Summary(kernel_graph.get()); | ||||
| } | } | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| PostIterationDbg(kernel_graph); | PostIterationDbg(kernel_graph); | ||||
| #endif | |||||
| } | } | ||||
| void GPUSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, | void GPUSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, | ||||
| @@ -339,7 +333,6 @@ void GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info | |||||
| RunOpClearMemory(kernel_graph.get()); | RunOpClearMemory(kernel_graph.get()); | ||||
| } | } | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { | void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { | ||||
| if (debugger_->DebuggerBackendEnabled()) { | if (debugger_->DebuggerBackendEnabled()) { | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| @@ -409,7 +402,6 @@ void GPUSession::PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph | |||||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | TensorLoader *tensor_loader = debug_services->tensor_loader(); | ||||
| tensor_loader->EmptyPrevTensor(); | tensor_loader->EmptyPrevTensor(); | ||||
| } | } | ||||
| #endif | |||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace session | } // namespace session | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -69,7 +69,6 @@ class GPUSession : public SessionBasic { | |||||
| void Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| bool DumpDataEnabledIteration() const; | bool DumpDataEnabledIteration() const; | ||||
| @@ -81,7 +80,6 @@ class GPUSession : public SessionBasic { | |||||
| void PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| void PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| #endif | |||||
| }; | }; | ||||
| using GPUSessionPtr = std::shared_ptr<GPUSession>; | using GPUSessionPtr = std::shared_ptr<GPUSession>; | ||||
| MS_REG_SESSION(kGPUDevice, GPUSession); | MS_REG_SESSION(kGPUDevice, GPUSession); | ||||
| @@ -32,7 +32,7 @@ | |||||
| #include "utils/contract.h" | #include "utils/contract.h" | ||||
| #include "runtime/device/kernel_info.h" | #include "runtime/device/kernel_info.h" | ||||
| #include "utils/ms_context.h" | #include "utils/ms_context.h" | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| #if !defined(_WIN32) && !defined(_WIN64) | |||||
| #include "debug/debugger/debugger.h" | #include "debug/debugger/debugger.h" | ||||
| #endif | #endif | ||||
| @@ -57,7 +57,7 @@ class Executor; | |||||
| class SessionBasic : public std::enable_shared_from_this<SessionBasic> { | class SessionBasic : public std::enable_shared_from_this<SessionBasic> { | ||||
| public: | public: | ||||
| SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) { | SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) { | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| #if !defined(_WIN32) && !defined(_WIN64) | |||||
| debugger_ = nullptr; | debugger_ = nullptr; | ||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -179,7 +179,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> { | |||||
| static GraphId graph_sum_; | static GraphId graph_sum_; | ||||
| uint32_t device_id_; | uint32_t device_id_; | ||||
| std::shared_ptr<Executor> executor_; | std::shared_ptr<Executor> executor_; | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| #if !defined(_WIN32) && !defined(_WIN64) | |||||
| std::shared_ptr<Debugger> debugger_; | std::shared_ptr<Debugger> debugger_; | ||||
| #endif | #endif | ||||
| }; | }; | ||||
| @@ -75,7 +75,6 @@ bool GPUKernelRuntime::Init() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| namespace { | namespace { | ||||
| void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | ||||
| const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs, | const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs, | ||||
| @@ -187,7 +186,6 @@ void ClearCurrentData(Debugger *debugger, bool dump_enabled) { | |||||
| } | } | ||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| #endif | |||||
| DeviceAddressPtr GPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | DeviceAddressPtr GPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | ||||
| TypeId type_id) { | TypeId type_id) { | ||||
| @@ -546,13 +544,11 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||||
| // The inputs and outputs memory of communication kernel need be continuous, so separate processing. | // The inputs and outputs memory of communication kernel need be continuous, so separate processing. | ||||
| AllocCommunicationOpDynamicRes(graph); | AllocCommunicationOpDynamicRes(graph); | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| debugger_ = debugger; | debugger_ = debugger; | ||||
| bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration(); | bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration(); | ||||
| if (!mock) { | if (!mock) { | ||||
| UpdateStepNum(debugger, dump_enabled); | UpdateStepNum(debugger, dump_enabled); | ||||
| } | } | ||||
| #endif | |||||
| auto &kernels = graph->execution_order(); | auto &kernels = graph->execution_order(); | ||||
| int exec_order = 1; | int exec_order = 1; | ||||
| @@ -567,12 +563,10 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||||
| AddressPtrList kernel_outputs; | AddressPtrList kernel_outputs; | ||||
| auto ret = AllocKernelDynamicRes(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs, mock); | auto ret = AllocKernelDynamicRes(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs, mock); | ||||
| if (!ret) { | if (!ret) { | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| if (!mock) { | if (!mock) { | ||||
| // invalidate current data collected by the debugger | // invalidate current data collected by the debugger | ||||
| ClearCurrentData(debugger, dump_enabled); | ClearCurrentData(debugger, dump_enabled); | ||||
| } | } | ||||
| #endif | |||||
| return false; | return false; | ||||
| } | } | ||||
| if (!mock) { | if (!mock) { | ||||
| @@ -591,29 +585,23 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||||
| } else { | } else { | ||||
| LaunchKernelWithTimeProfiling(kernel, kernel_inputs, kernel_workspaces, kernel_outputs); | LaunchKernelWithTimeProfiling(kernel, kernel_inputs, kernel_workspaces, kernel_outputs); | ||||
| } | } | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| // called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost) | // called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost) | ||||
| LoadKernelData(debugger, kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_, | LoadKernelData(debugger, kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_, | ||||
| dump_enabled); | dump_enabled); | ||||
| #endif | |||||
| } | } | ||||
| exec_order = exec_order + 1; | exec_order = exec_order + 1; | ||||
| FreeKernelDynamicRes(kernel); | FreeKernelDynamicRes(kernel); | ||||
| if (!UpdateMemorySwapTask(kernel, mock, profiling)) { | if (!UpdateMemorySwapTask(kernel, mock, profiling)) { | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| if (!mock) { | if (!mock) { | ||||
| // invalidate current data collected by the debugger | // invalidate current data collected by the debugger | ||||
| ClearCurrentData(debugger, dump_enabled); | ClearCurrentData(debugger, dump_enabled); | ||||
| } | } | ||||
| #endif | |||||
| return false; | return false; | ||||
| } | } | ||||
| } | } | ||||
| if (!mock) { | if (!mock) { | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| // collect weights and bias for dump mode | // collect weights and bias for dump mode | ||||
| LoadParameters(graph, debugger, dump_enabled); | LoadParameters(graph, debugger, dump_enabled); | ||||
| #endif | |||||
| CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); | CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); | ||||
| } | } | ||||
| ClearSwapInfo(mock); | ClearSwapInfo(mock); | ||||