Merge pull request !6499 from john_tzanakakis/master_ms1_grpctags/v1.1.0
| @@ -174,11 +174,9 @@ GraphId AscendSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) { | |||
| device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get())); | |||
| // build kernel | |||
| BuildKernel(root_graph); | |||
| #ifdef ENABLE_DEBUGGER | |||
| if (debugger_) { | |||
| debugger_->PreExecute(root_graph); | |||
| } | |||
| #endif | |||
| // alloc mem | |||
| MemoryAlloc(root_graph.get()); | |||
| // generate and load task into device | |||
| @@ -249,11 +247,9 @@ void AscendSession::BuildGraph(GraphId graph_id) { | |||
| BuildKernel(graph); | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| #ifdef ENABLE_DEBUGGER | |||
| if (debugger_) { | |||
| debugger_->PreExecute(graph); | |||
| } | |||
| #endif | |||
| if (ms_context->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) { | |||
| MS_LOG(INFO) << "Precompile only, stop in build kernel step"; | |||
| } else { | |||
| @@ -325,18 +321,14 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor:: | |||
| } | |||
| // summary | |||
| Summary(kernel_graph.get()); | |||
| #ifdef ENABLE_DEBUGGER | |||
| // load tensor from device for debugger | |||
| if (debugger_ && debugger_->debugger_enabled()) { | |||
| LoadTensor(kernel_graph); | |||
| } | |||
| #endif | |||
| #ifdef ENABLE_DEBUGGER | |||
| // debugger post-execution processing | |||
| if (debugger_) { | |||
| debugger_->PostExecute(); | |||
| } | |||
| #endif | |||
| MS_LOG(INFO) << "Finish!"; | |||
| } | |||
| @@ -282,9 +282,7 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList | |||
| void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) { | |||
| auto &kernel_graph = graphs_[graph_id]; | |||
| #ifdef ENABLE_DEBUGGER | |||
| PreIterationDbg(kernel_graph); | |||
| #endif | |||
| // Load input data from user input | |||
| LoadInputData(kernel_graph, inputs); | |||
| #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) | |||
| @@ -293,18 +291,14 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten | |||
| #endif | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| Execute(kernel_graph); | |||
| #ifdef ENABLE_DEBUGGER | |||
| PostLoadTensor(kernel_graph); | |||
| #endif | |||
| // Summary | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| if (context_ptr->get_param<bool>(MS_CTX_ENABLE_GPU_SUMMARY)) { | |||
| Summary(kernel_graph.get()); | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| PostIterationDbg(kernel_graph); | |||
| #endif | |||
| } | |||
| void GPUSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, | |||
| @@ -339,7 +333,6 @@ void GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info | |||
| RunOpClearMemory(kernel_graph.get()); | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| @@ -409,7 +402,6 @@ void GPUSession::PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| tensor_loader->EmptyPrevTensor(); | |||
| } | |||
| #endif | |||
| } // namespace gpu | |||
| } // namespace session | |||
| } // namespace mindspore | |||
| @@ -69,7 +69,6 @@ class GPUSession : public SessionBasic { | |||
| void Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| #ifdef ENABLE_DEBUGGER | |||
| void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| bool DumpDataEnabledIteration() const; | |||
| @@ -81,7 +80,6 @@ class GPUSession : public SessionBasic { | |||
| void PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| #endif | |||
| }; | |||
| using GPUSessionPtr = std::shared_ptr<GPUSession>; | |||
| MS_REG_SESSION(kGPUDevice, GPUSession); | |||
| @@ -32,7 +32,7 @@ | |||
| #include "utils/contract.h" | |||
| #include "runtime/device/kernel_info.h" | |||
| #include "utils/ms_context.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #if !defined(_WIN32) && !defined(_WIN64) | |||
| #include "debug/debugger/debugger.h" | |||
| #endif | |||
| @@ -57,7 +57,7 @@ class Executor; | |||
| class SessionBasic : public std::enable_shared_from_this<SessionBasic> { | |||
| public: | |||
| SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) { | |||
| #ifdef ENABLE_DEBUGGER | |||
| #if !defined(_WIN32) && !defined(_WIN64) | |||
| debugger_ = nullptr; | |||
| #endif | |||
| } | |||
| @@ -179,7 +179,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> { | |||
| static GraphId graph_sum_; | |||
| uint32_t device_id_; | |||
| std::shared_ptr<Executor> executor_; | |||
| #ifdef ENABLE_DEBUGGER | |||
| #if !defined(_WIN32) && !defined(_WIN64) | |||
| std::shared_ptr<Debugger> debugger_; | |||
| #endif | |||
| }; | |||
| @@ -75,7 +75,6 @@ bool GPUKernelRuntime::Init() { | |||
| return ret; | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| namespace { | |||
| void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||
| const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs, | |||
| @@ -187,7 +186,6 @@ void ClearCurrentData(Debugger *debugger, bool dump_enabled) { | |||
| } | |||
| } | |||
| } // namespace | |||
| #endif | |||
| DeviceAddressPtr GPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | |||
| TypeId type_id) { | |||
| @@ -546,13 +544,11 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||
| // The inputs and outputs memory of communication kernel need be continuous, so separate processing. | |||
| AllocCommunicationOpDynamicRes(graph); | |||
| #ifdef ENABLE_DEBUGGER | |||
| debugger_ = debugger; | |||
| bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration(); | |||
| if (!mock) { | |||
| UpdateStepNum(debugger, dump_enabled); | |||
| } | |||
| #endif | |||
| auto &kernels = graph->execution_order(); | |||
| int exec_order = 1; | |||
| @@ -567,12 +563,10 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||
| AddressPtrList kernel_outputs; | |||
| auto ret = AllocKernelDynamicRes(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs, mock); | |||
| if (!ret) { | |||
| #ifdef ENABLE_DEBUGGER | |||
| if (!mock) { | |||
| // invalidate current data collected by the debugger | |||
| ClearCurrentData(debugger, dump_enabled); | |||
| } | |||
| #endif | |||
| return false; | |||
| } | |||
| if (!mock) { | |||
| @@ -591,29 +585,23 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||
| } else { | |||
| LaunchKernelWithTimeProfiling(kernel, kernel_inputs, kernel_workspaces, kernel_outputs); | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| // called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost) | |||
| LoadKernelData(debugger, kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_, | |||
| dump_enabled); | |||
| #endif | |||
| } | |||
| exec_order = exec_order + 1; | |||
| FreeKernelDynamicRes(kernel); | |||
| if (!UpdateMemorySwapTask(kernel, mock, profiling)) { | |||
| #ifdef ENABLE_DEBUGGER | |||
| if (!mock) { | |||
| // invalidate current data collected by the debugger | |||
| ClearCurrentData(debugger, dump_enabled); | |||
| } | |||
| #endif | |||
| return false; | |||
| } | |||
| } | |||
| if (!mock) { | |||
| #ifdef ENABLE_DEBUGGER | |||
| // collect weights and bias for dump mode | |||
| LoadParameters(graph, debugger, dump_enabled); | |||
| #endif | |||
| CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); | |||
| } | |||
| ClearSwapInfo(mock); | |||