| @@ -171,7 +171,7 @@ GraphId AscendSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) { | |||
| device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get())); | |||
| // build kernel | |||
| BuildKernel(root_graph); | |||
| if (debugger_) { | |||
| if (debugger_ && debugger_->partial_memory()) { | |||
| debugger_->PreExecute(root_graph); | |||
| } | |||
| SetSummaryNodes(root_graph.get()); | |||
| @@ -248,7 +248,7 @@ void AscendSession::BuildGraph(GraphId graph_id) { | |||
| BuildKernel(graph); | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| if (debugger_) { | |||
| if (debugger_ && debugger_->partial_memory()) { | |||
| debugger_->PreExecute(graph); | |||
| } | |||
| if (ms_context->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) { | |||
| @@ -312,6 +312,9 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor:: | |||
| } | |||
| // load input data from user input | |||
| LoadInputData(kernel_graph, inputs); | |||
| if (debugger_) { | |||
| debugger_->PreExecute(kernel_graph); | |||
| } | |||
| #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) | |||
| // Initialize parameter server | |||
| InitPSParamAndOptim(kernel_graph, inputs); | |||
| @@ -278,9 +278,9 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList | |||
| void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) { | |||
| auto &kernel_graph = graphs_[graph_id]; | |||
| PreIterationDbg(kernel_graph); | |||
| // Load input data from user input | |||
| LoadInputData(kernel_graph, inputs); | |||
| PreIterationDbg(kernel_graph); | |||
| #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) | |||
| // Initialize parameter server | |||
| InitPSParamAndOptim(kernel_graph, inputs); | |||
| @@ -22,7 +22,6 @@ | |||
| #include <utility> | |||
| #include <memory> | |||
| #include <map> | |||
| #include "backend/session/session_context.h" | |||
| #include "backend/session/kernel_graph.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| @@ -30,6 +30,7 @@ | |||
| #include "pipeline/jit/pipeline.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "runtime/device/kernel_runtime_manager.h" | |||
| #include "runtime/device/kernel_runtime.h" | |||
| using debugger::EventReply; | |||
| using debugger::GraphProto; | |||
| @@ -47,6 +48,7 @@ namespace mindspore { | |||
| DebuggerPtr Debugger::debugger_ = nullptr; | |||
| std::mutex Debugger::instance_lock_; | |||
| static const size_t PRAMATER_OUTPUT_INDEX = 0; | |||
| Debugger::Debugger() | |||
| : grpc_client_(nullptr), | |||
| @@ -62,7 +64,26 @@ Debugger::Debugger() | |||
| is_dataset_graph_(false), | |||
| partial_memory_(false), | |||
| last_overflow_bin_(0), | |||
| overflow_bin_path_("") {} | |||
| overflow_bin_path_("") { | |||
| if (CheckDebuggerEnabled()) { | |||
| // configure partial memory reuse | |||
| partial_memory_ = CheckDebuggerPartialMemoryEnabled(); | |||
| // switch memory reuse on or off | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| context_ptr->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, partial_memory_); | |||
| // print some message about memory reuse to user | |||
| if (partial_memory_) { | |||
| MS_LOG(WARNING) | |||
| << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first " | |||
| "step. 2. Tensor values are only available for nodes that are watched by any watchpoint."; | |||
| } else { | |||
| MS_LOG(INFO) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory " | |||
| "usage for large models."; | |||
| } | |||
| } | |||
| } | |||
| void Debugger::Init(const uint32_t device_id, const std::string device_target) { | |||
| // access lock for public method | |||
| @@ -133,27 +154,6 @@ void Debugger::EnableDebugger() { | |||
| MS_LOG(INFO) << "Environment variable MS_DEBUGGER_PORT doesn't exist. Using default debugger port: 50051"; | |||
| port = "50051"; | |||
| } | |||
| // configure partial memory reuse | |||
| const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM"); | |||
| if (env_partial_mem_str != nullptr) { | |||
| MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str; | |||
| if (std::strcmp(env_partial_mem_str, "1") == 0) { | |||
| partial_memory_ = true; | |||
| } | |||
| } | |||
| // switch memory reuse on or off | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| context_ptr->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, partial_memory_); | |||
| // print some message about memory reuse to user | |||
| if (partial_memory_) { | |||
| MS_LOG(WARNING) << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first " | |||
| "step. 2. Tensor values are only available for nodes that are watched by any watchpoint."; | |||
| } else { | |||
| MS_LOG(INFO) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory " | |||
| "usage for large models."; | |||
| } | |||
| #ifdef ENABLE_D | |||
| // set operation overflow info | |||
| overflow_bin_path_ = DumpJsonParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_); | |||
| @@ -195,9 +195,7 @@ void Debugger::EnableDebugger() { | |||
| bool Debugger::CheckDebuggerDumpEnabled() { | |||
| // see if dump is enabled | |||
| if (device_target_ == kGPUDevice) { | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| return runtime_instance->DumpDataEnabled(); | |||
| return device::KernelRuntime::DumpDataEnabled(); | |||
| } | |||
| return false; | |||
| } | |||
| @@ -213,6 +211,17 @@ bool Debugger::CheckDebuggerEnabled() { | |||
| return false; | |||
| } | |||
| bool Debugger::CheckDebuggerPartialMemoryEnabled() { | |||
| const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM"); | |||
| if (env_partial_mem_str != nullptr) { | |||
| MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str; | |||
| if (std::strcmp(env_partial_mem_str, "1") == 0) { | |||
| return true; | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| bool Debugger::DebuggerBackendEnabled() { return CheckDebuggerDumpEnabled() || CheckDebuggerEnabled(); } | |||
| void Debugger::Reset() { | |||
| @@ -324,6 +333,7 @@ void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) { | |||
| // only try to enable debugger if it is not a dataset graph | |||
| EnableDebugger(); | |||
| if (debugger_enabled_) { | |||
| LoadParameters(); | |||
| // get graph proto and send to mindinsight | |||
| SendGraphAndSuspend(GetGraphProto()); | |||
| } | |||
| @@ -839,4 +849,34 @@ bool Debugger::CheckPort(const char *port) { | |||
| return true; | |||
| } | |||
| void Debugger::LoadParameters() { | |||
| if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return; | |||
| if (!(num_step_ == 0 || device_target_ == kAscendDevice || | |||
| (device_target_ == kGPUDevice && device::KernelRuntime::DumpDataEnabledIteration()))) | |||
| return; | |||
| MS_EXCEPTION_IF_NULL(graph_ptr_); | |||
| const auto ¶meters = graph_ptr_->inputs(); | |||
| // for parameters, set its execution order to be 0; | |||
| int exec_order = 0; | |||
| for (auto &item : parameters) { | |||
| if (!item->isa<Parameter>()) { | |||
| continue; | |||
| } | |||
| std::string parameter_name = item->fullname_with_scope(); | |||
| auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); | |||
| auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string tensor_name = parameter_name + ':' + "0"; | |||
| ShapeVector int_shapes; | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, true); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | |||
| } | |||
| } | |||
| } | |||
| } // namespace mindspore | |||
| @@ -103,6 +103,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| void SendMetadata(); | |||
| void LoadParameters(); | |||
| private: | |||
| // private constructor for singleton | |||
| Debugger(); | |||
| @@ -118,6 +120,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // check if debugger enabled | |||
| bool CheckDebuggerEnabled(); | |||
| bool CheckDebuggerPartialMemoryEnabled(); | |||
| // check and save graph pointer | |||
| void CheckGraphPtr(const KernelGraphPtr &graph_ptr); | |||
| @@ -663,39 +663,25 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order, | |||
| bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, | |||
| const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type, | |||
| size_t slot, Debugger *debugger, bool keep_prev) const { | |||
| size_t slot, bool keep_prev) const { | |||
| bool ret = false; | |||
| DebugServices *debug_services = debugger->debug_services(); | |||
| MS_EXCEPTION_IF_NULL(debug_services); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| TensorLoader *tensor_loader = Debugger::GetInstance()->debug_services()->tensor_loader(); | |||
| MS_EXCEPTION_IF_NULL(tensor_loader); | |||
| // TensorData is freed up in AscendSession class | |||
| auto tensor_data = std::make_shared<mindspore::TensorData>(); | |||
| tensor_data->SetName(tensor_name); | |||
| tensor_data->SetExecutionOrder(execution_order); | |||
| tensor_data->SetSlot(slot); | |||
| if (trans_flag) { | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| ret = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "Copy device mem to host failed"; | |||
| return ret; | |||
| } | |||
| tensor_data->SetTensor(out_tensor); | |||
| } else { | |||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]"; | |||
| } | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| tensor_data->SetTensor(out_tensor); | |||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| if (ret_rt_memcpy != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]"; | |||
| } | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| tensor_data->SetTensor(out_tensor); | |||
| ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev); | |||
| return ret; | |||
| } | |||
| @@ -45,9 +45,8 @@ class AscendDeviceAddress : public DeviceAddress { | |||
| bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type) const override; | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger, | |||
| bool keep_prev) const; | |||
| bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override; | |||
| #endif | |||
| private: | |||
| @@ -254,15 +254,10 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) { | |||
| auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | |||
| MS_EXCEPTION_IF_NULL(ascend_addr); | |||
| ShapeVector int_shapes; | |||
| if (trans_flag) { | |||
| int_shapes = trans::GetRuntimePaddingShape(node, j); | |||
| } else { | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(node, j); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| } | |||
| auto ret = | |||
| ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, j, debugger, false); | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(node, j); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| auto ret = ascend_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost: flag:" << trans_flag << ", tensor_name:" << tensor_name | |||
| << ", host_format:" << format << ".!"; | |||
| @@ -272,40 +267,6 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) { | |||
| } | |||
| } | |||
| void LoadParameters(mindspore::session::KernelGraph *graph, Debugger *debugger) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| // trans_flag: "true" means tensor values will be transfered to host format, otherwise not. | |||
| bool trans_flag = false; | |||
| const auto ¶meters = graph->inputs(); | |||
| // for parameters, set its execution order to be 0; | |||
| int exec_order = 0; | |||
| for (auto &item : parameters) { | |||
| if (!item->isa<Parameter>()) { | |||
| continue; | |||
| } | |||
| std::string parameter_name = item->fullname_with_scope(); | |||
| auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); | |||
| auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string tensor_name = parameter_name + ':' + "0"; | |||
| auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | |||
| MS_EXCEPTION_IF_NULL(ascend_addr); | |||
| ShapeVector int_shapes; | |||
| if (trans_flag) { | |||
| int_shapes = trans::GetRuntimePaddingShape(item, PRAMATER_OUTPUT_INDEX); | |||
| } else { | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| } | |||
| auto ret = | |||
| ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, 0, debugger, true); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost Failed: flag:" << trans_flag << ", path:" << tensor_name | |||
| << ", host_format:" << format << ".!"; | |||
| } | |||
| } | |||
| } | |||
| } // namespace | |||
| #endif | |||
| @@ -319,7 +280,7 @@ bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph, Debug | |||
| // load output | |||
| LoadOutput(graph, debugger); | |||
| // load parameters | |||
| LoadParameters(graph, debugger); | |||
| if (debugger) debugger->LoadParameters(); | |||
| #endif | |||
| return true; | |||
| } | |||
| @@ -70,6 +70,12 @@ class DeviceAddress : public mindspore::DeviceSync { | |||
| const ShapeVector &host_shape, TypeId host_type) const { | |||
| return true; | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| virtual bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const { | |||
| return true; | |||
| } | |||
| #endif | |||
| protected: | |||
| const void *ptr() const { return ptr_; } | |||
| @@ -80,14 +80,14 @@ GPUDeviceAddress::~GPUDeviceAddress() { | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, | |||
| bool keep_prev) const { | |||
| bool ret = false; | |||
| if (size_ == 0) { | |||
| return true; | |||
| } | |||
| DebugServices *debug_services = debugger->debug_services(); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| TensorLoader *tensor_loader = Debugger::GetInstance()->debug_services()->tensor_loader(); | |||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| @@ -44,8 +44,7 @@ class GPUDeviceAddress : public DeviceAddress { | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger, | |||
| bool keep_prev) const; | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override; | |||
| #endif | |||
| private: | |||
| DeviceAddressStatus status_{DeviceAddressStatus::kInDevice}; | |||
| @@ -111,7 +111,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, debugger, true); | |||
| auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!"; | |||
| @@ -130,7 +130,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(kernel, j); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, debugger, false); | |||
| auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | |||
| @@ -148,36 +148,6 @@ void UpdateStepNum(Debugger *debugger, bool dump_enabled) { | |||
| } | |||
| } | |||
| void LoadParameters(const session::KernelGraph *graph, Debugger *debugger, bool dump_enabled) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| if (!(debugger && dump_enabled)) { | |||
| return; | |||
| } | |||
| const auto ¶meters = graph->inputs(); | |||
| // for parameters, set its execution order to be 0; | |||
| int exec_order = 0; | |||
| for (auto &item : parameters) { | |||
| if (!item->isa<Parameter>()) { | |||
| continue; | |||
| } | |||
| std::string parameter_name = item->fullname_with_scope(); | |||
| auto addr = AnfAlgo::GetOutputAddr(item, PARAMETER_OUTPUT_INDEX); | |||
| auto type = AnfAlgo::GetOutputInferDataType(item, PARAMETER_OUTPUT_INDEX); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string tensor_name = parameter_name + ':' + "0"; | |||
| auto gpu_addr = dynamic_cast<const mindspore::device::gpu::GPUDeviceAddress *>(addr); | |||
| ShapeVector int_shapes; | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(item, PARAMETER_OUTPUT_INDEX); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, debugger, true); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | |||
| } | |||
| } | |||
| } | |||
| void ClearCurrentData(Debugger *debugger, bool dump_enabled) { | |||
| if (debugger && (debugger->debugger_enabled() || dump_enabled)) { | |||
| DebugServices *debug_services = debugger->debug_services(); | |||
| @@ -601,7 +571,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||
| } | |||
| if (!mock) { | |||
| // collect weights and bias for dump mode | |||
| LoadParameters(graph, debugger, dump_enabled); | |||
| if (debugger) debugger->LoadParameters(); | |||
| CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); | |||
| } | |||
| ClearSwapInfo(mock); | |||
| @@ -53,8 +53,8 @@ class KernelRuntime { | |||
| void RunOpAssignMemory(const ValuePtr &pre_output_value, const std::vector<tensor::TensorPtr> &input_tensors, | |||
| session::KernelGraph *graph); | |||
| void RunOpClearMemory(const session::KernelGraph *graph); | |||
| bool DumpDataEnabled(); | |||
| bool DumpDataEnabledIteration(); | |||
| static bool DumpDataEnabled(); | |||
| static bool DumpDataEnabledIteration(); | |||
| virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger); | |||
| virtual bool Load(session::KernelGraph *graph, bool is_task_sink); | |||
| virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0; | |||