| @@ -335,7 +335,41 @@ void DebugServices::SetTensorToNotInUse(const std::shared_ptr<TensorData> &tenso | |||
| } | |||
| #endif | |||
| #ifdef ONLINE_DBG_MODE | |||
| bool DebugServices::CompareCurrentRootGraph(uint32_t id) { | |||
| auto debugger = Debugger::GetInstance(); | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| std::string device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET); | |||
| auto cur_root_graph_id = debugger->GetCurrentRootGraphId(); | |||
| if ((device_target == kGPUDevice && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) || | |||
| device_target == kAscendDevice) { | |||
| if (cur_root_graph_id != id) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| const void *DebugServices::PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name) { | |||
| std::shared_ptr<TensorData> prev_tensor_data; | |||
| if (!CompareCurrentRootGraph(Debugger::GetInstance()->GetPrevRootGraphId())) { | |||
| // not supporting watchpoints that need prev tensor for multi root graph networks. | |||
| MS_LOG(DEBUG) << "Previous root graph is different from current root graph, setting prev_tensor to nullptr."; | |||
| prev_tensor_data = nullptr; | |||
| } else { | |||
| prev_tensor_data = tensor_loader_->GetPrevTensor(tensor_name); | |||
| } | |||
| if (prev_tensor_data) { | |||
| *prev_num_elements = prev_tensor_data->GetNumElements(); | |||
| return prev_tensor_data->GetDataPtr(); | |||
| } | |||
| return nullptr; | |||
| } | |||
| #endif | |||
| void DebugServices::CheckHistoryErrorCode(int *error_code, bool history_not_found) { | |||
| // check history error_code only for offline debugger | |||
| if (history_not_found) { | |||
| *error_code = ITensorSummary::HISTORY_NOT_FOUND; // error code for history not found | |||
| } | |||
| @@ -401,13 +435,14 @@ void DebugServices::CheckWatchpointsForTensor( | |||
| bool history_not_found = 0; | |||
| previous_tensor_ptr = GetPrevTensor(tensor, previous_iter_tensor_needed, &prev_num_elements, &history_not_found); | |||
| #else | |||
| std::shared_ptr<TensorData> prev_tensor_data = tensor_loader_->GetPrevTensor(tensor_name); | |||
| if (prev_tensor_data) { | |||
| previous_tensor_ptr = prev_tensor_data->GetDataPtr(); | |||
| prev_num_elements = prev_tensor_data->GetNumElements(); | |||
| if (!CompareCurrentRootGraph(tensor->GetRootGraphId())) { | |||
| MS_LOG(DEBUG) | |||
| << "Current root_graph_id is different from tensor's root_graph_id, skipping checkwatchpoints for tensor: " | |||
| << tensor->GetName(); | |||
| continue; | |||
| } | |||
| previous_tensor_ptr = PreparePrevTensor(&prev_num_elements, tensor_name); | |||
| #endif | |||
| std::unique_ptr<ITensorSummary> base_summary_ptr; | |||
| if (!(watchpoints_to_check.size() == 1 && watchpoints_to_check[0].condition.type == IS_OVERFLOW)) { | |||
| base_summary_ptr = GetSummaryPtr(tensor, previous_tensor_ptr, num_elements, prev_num_elements, tensor_dtype); | |||
| @@ -440,7 +475,6 @@ void DebugServices::CheckWatchpointsForTensor( | |||
| tensor->GetDeviceId(), tensor->GetRootGraphId(), parameter_list, error_code); | |||
| } | |||
| } | |||
| #ifdef OFFLINE_DBG_MODE | |||
| SetTensorToNotInUse(tensor, previous_tensor_ptr); | |||
| // in offline mode remove the need for the data | |||
| @@ -448,6 +482,7 @@ void DebugServices::CheckWatchpointsForTensor( | |||
| #endif | |||
| } | |||
| } | |||
| void DebugServices::CheckWatchpoints( | |||
| std::vector<std::string> *const name, std::vector<std::string> *const slot, std::vector<int> *const condition, | |||
| std::vector<unsigned int> *const watchpoint_id, std::vector<std::vector<parameter_t>> *const parameters, | |||
| @@ -1362,6 +1397,14 @@ void DebugServices::ReadNodesTensors(const std::vector<std::string> &name, std:: | |||
| if (std::get<1>(result) == nullptr) { | |||
| continue; | |||
| } | |||
| #ifdef ONLINE_DBG_MODE | |||
| if (!CompareCurrentRootGraph(std::get<1>(result)->GetRootGraphId())) { | |||
| MS_LOG(INFO) << "tensor root_graph_id: " << std::get<1>(result)->GetRootGraphId() | |||
| << " is different from cur_root_graph_id: " << Debugger::GetInstance()->GetCurrentRootGraphId() | |||
| << "."; | |||
| MS_LOG(INFO) << "Not reading tensor: " << std::get<0>(result) << "."; | |||
| } | |||
| #endif | |||
| (void)ret_name->emplace_back(std::get<0>(result)); | |||
| (void)data_ptr->emplace_back(reinterpret_cast<const char *>(std::get<1>(result)->GetDataPtr())); | |||
| (void)data_size->emplace_back(std::get<1>(result)->GetByteSize()); | |||
| @@ -260,6 +260,8 @@ class DebugServices { | |||
| const std::vector<parameter_t> ¶meter_list); | |||
| #endif | |||
| const void *PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name); | |||
| void CheckHistoryErrorCode(int *error_code, bool history_not_found); | |||
| void CheckWatchpointsForTensor(partitioned_names *chunk_names, partitioned_names *chunk_slots, | |||
| @@ -411,6 +413,8 @@ class DebugServices { | |||
| bool IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel = nullptr) const; | |||
| bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const; | |||
| bool CompareCurrentRootGraph(uint32_t id); | |||
| #endif | |||
| std::vector<std::shared_ptr<TensorData>> GetTensor() const; | |||
| @@ -77,6 +77,8 @@ Debugger::Debugger() | |||
| node_name_(""), | |||
| cur_name_(""), | |||
| training_done_(false), | |||
| send_metadata_done_(false), | |||
| received_new_graph_(false), | |||
| is_dataset_graph_(false), | |||
| partial_memory_(false), | |||
| initial_suspend_(true), | |||
| @@ -284,20 +286,35 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs | |||
| } | |||
| // Store graphs that are run in one step. | |||
| graph_ptr_step_vec_ = graphs; | |||
| prev_root_graph_id_ = cur_root_graph_id_; | |||
| // set first run graph as the root graph | |||
| cur_root_graph_id_ = graph_ptr_step_vec_[0]->graph_id(); | |||
| MS_LOG(DEBUG) << "Current root graph id: " << cur_root_graph_id_ << " prev_root_graph_id_: " << prev_root_graph_id_ | |||
| << " for step: " << num_step_ << "."; | |||
| MS_LOG(DEBUG) << "Set root graph for all the subgraphs:"; | |||
| for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) { | |||
| const auto &graph = graphs[graph_index]; | |||
| // set root graph id for GPU mindrt runtime. | |||
| MS_LOG(DEBUG) << "Set root graph for graph: " << graph->graph_id() << " to: " << cur_root_graph_id_ << "."; | |||
| graph->set_root_graph_id(cur_root_graph_id_); | |||
| if (debugger_) { | |||
| debugger_->PreExecute(graph); | |||
| } | |||
| } | |||
| } | |||
| void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(graph_ptr); | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| CheckDatasetSinkMode(); | |||
| auto graph_id = graph_ptr->graph_id(); | |||
| void Debugger::SetCurrentAndPrevRootGraph(uint32_t root_graph_id) { | |||
| // for GPU root graphs are set in PreExecuteGraphDebugger. | |||
| if (device_target_ != kAscendDevice) { | |||
| return; | |||
| } | |||
| prev_root_graph_id_ = cur_root_graph_id_; | |||
| cur_root_graph_id_ = root_graph_id; | |||
| MS_LOG(DEBUG) << "Current root graph id: " << cur_root_graph_id_ << " prev_root_graph_id_: " << prev_root_graph_id_ | |||
| << " for step: " << num_step_ << "."; | |||
| } | |||
| void Debugger::StoreRunGraphIdList(uint32_t graph_id) { | |||
| // collect rungrap_ids to update step number in multigraph case | |||
| if (!rungraph_id_list_.size()) { | |||
| rungraph_id_list_.push_back(graph_id); | |||
| @@ -307,6 +324,17 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) { | |||
| rungraph_id_list_.push_back(graph_id); | |||
| } | |||
| } | |||
| } | |||
| void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(graph_ptr); | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| CheckDatasetSinkMode(); | |||
| auto graph_id = graph_ptr->graph_id(); | |||
| MS_LOG(DEBUG) << "PreExecute for graph: " << graph_id << " in step: " << num_step_ << "."; | |||
| StoreRunGraphIdList(graph_id); | |||
| SetCurrentAndPrevRootGraph(graph_ptr->root_graph_id()); | |||
| // multiple graphs | |||
| if (graph_proto_list_.size() > 1) { | |||
| // there are more than one graphs are not dataset_graph | |||
| @@ -315,20 +343,22 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) { | |||
| } | |||
| } else if (graph_proto_list_.size() == 1) { | |||
| // single graph, and not the initial step | |||
| if (device_target_ == kGPUDevice && num_step_ != 0) { | |||
| if (device_target_ == kGPUDevice && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) && | |||
| num_step_ != 0) { | |||
| if (debugger_enabled_ && !(run_level_ == "node" && suspended_at_last_kernel_)) { | |||
| CommandLoop(); | |||
| } | |||
| debug_services_->ResetLoadedTensors(); | |||
| } | |||
| // In single graph case, reset graph_ptr_ to be nullptr for the initial step | |||
| if (num_step_ == 0) { | |||
| // In single graph case, reset graph_ptr_ to be nullptr when debugger receives a new graph | |||
| if (received_new_graph_) { | |||
| graph_ptr_ = nullptr; | |||
| CheckGraphPtr(graph_ptr); | |||
| } | |||
| } else if (debugger_enabled_ && graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice) { | |||
| } else if (debugger_enabled_ && graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice && | |||
| !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) { | |||
| // Multiple graph, and not the initial step, | |||
| // stop only when receive the first sub run graph for each step | |||
| // stop only when receive the first sub run graph for each step for old runtime | |||
| // if we have stopped for the last kernel before, no need to stop again | |||
| if (pipeline::GraphExecutorPy::GetDebugTerminate()) { | |||
| return; | |||
| @@ -359,6 +389,7 @@ void Debugger::SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr) { | |||
| SendMultiGraphsAndSuspend(graph_proto_list_); | |||
| graph_proto_list_.clear(); | |||
| received_new_graph_ = false; | |||
| } | |||
| } | |||
| @@ -474,14 +505,19 @@ void Debugger::PostExecute() { | |||
| } | |||
| SendWatchpoints(CheckWatchpoints()); | |||
| // no need to suspend at each graph for GPU, suspension happens in preExecute | |||
| if (device_target_ != kGPUDevice) { | |||
| // no need to suspend at each graph for GPU old runtime, suspension happens in preExecute | |||
| if (device_target_ == kAscendDevice) { | |||
| CommandLoop(); | |||
| } else if (device_target_ == kGPUDevice && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) { | |||
| if (!(run_level_ == "node" && suspended_at_last_kernel_)) { | |||
| CommandLoop(); | |||
| } | |||
| } | |||
| } | |||
| // Only keep parameters in the current map | |||
| // GPU ResetLoadedTensors happens in preExecute | |||
| if (device_target_ != kGPUDevice) { | |||
| // Only keep parameters in th current map | |||
| // GPU ResetLoadedTensors for old runtime happens in preExecute | |||
| if ((device_target_ == kGPUDevice && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) || | |||
| device_target_ == kAscendDevice) { | |||
| debug_services_->ResetLoadedTensors(); | |||
| } | |||
| } | |||
| @@ -534,6 +570,7 @@ void Debugger::LoadGraphs(const KernelGraphPtr &graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(graph_ptr); | |||
| if (graph_ptr_ != graph_ptr) { | |||
| MS_LOG(INFO) << "LoadGraphs Debugger got new graph: " << graph_ptr->graph_id(); | |||
| received_new_graph_ = true; | |||
| // save new graph_ptr | |||
| graph_ptr_ = graph_ptr; | |||
| CheckDatasetGraph(); | |||
| @@ -559,12 +596,16 @@ void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) { | |||
| graph_ptr_ = graph_ptr; | |||
| if (!is_dataset_graph_) { | |||
| // only try to enable debugger if it is not a dataset graph | |||
| EnableDebugger(); | |||
| if (!debugger_enabled_) { | |||
| EnableDebugger(); | |||
| } | |||
| if (debugger_enabled_) { | |||
| LoadParametersAndConst(); | |||
| // get graph proto and send to Mindinsight | |||
| auto graph_proto = graph_proto_list_.front(); | |||
| SendGraphAndSuspend(graph_proto); | |||
| graph_proto_list_.clear(); | |||
| received_new_graph_ = false; | |||
| } | |||
| } | |||
| } | |||
| @@ -636,16 +677,17 @@ void Debugger::SendHeartbeat(int32_t period) { | |||
| } | |||
| void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) { | |||
| if (SendMetadata(true)) { | |||
| // send graph to Mindinsight server | |||
| MS_EXCEPTION_IF_NULL(grpc_client_); | |||
| EventReply reply = grpc_client_->SendGraph(graph_proto); | |||
| if (reply.status() != reply.OK) { | |||
| MS_LOG(ERROR) << "Error: SendGraph failed"; | |||
| } | |||
| // enter command loop, wait and process commands | |||
| CommandLoop(); | |||
| if (!CheckSendMetadata()) { | |||
| return; | |||
| } | |||
| // send graph to Mindinsight server | |||
| MS_EXCEPTION_IF_NULL(grpc_client_); | |||
| EventReply reply = grpc_client_->SendGraph(graph_proto); | |||
| if (reply.status() != reply.OK) { | |||
| MS_LOG(ERROR) << "Error: SendGraph failed"; | |||
| } | |||
| // enter command loop, wait and process commands | |||
| CommandLoop(); | |||
| } | |||
| bool Debugger::SendMetadata(bool version_check) { | |||
| @@ -695,7 +737,7 @@ bool Debugger::SendMetadata(bool version_check) { | |||
| } | |||
| void Debugger::SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_proto_list) { | |||
| if (!SendMetadata(true)) { | |||
| if (!CheckSendMetadata()) { | |||
| return; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(grpc_client_); | |||
| @@ -732,10 +774,20 @@ void Debugger::SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_prot | |||
| CommandLoop(); | |||
| } | |||
| bool Debugger::CheckSendMetadata() { | |||
| if (!send_metadata_done_) { | |||
| if (!SendMetadata(true)) { | |||
| return false; | |||
| } | |||
| send_metadata_done_ = true; | |||
| } | |||
| return true; | |||
| } | |||
| void Debugger::CommandLoop() { | |||
| // prepare metadata | |||
| MS_EXCEPTION_IF_NULL(graph_ptr_); | |||
| std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id()); | |||
| std::string device_name = std::to_string(device_id_) + ":" + std::to_string(cur_root_graph_id_); | |||
| Metadata metadata; | |||
| metadata.set_device_name(device_name); | |||
| @@ -1051,8 +1103,8 @@ std::list<TensorBase> Debugger::LoadTensorsBase(const ProtoVector<TensorProto> & | |||
| debug_services_->SearchNodesTensors(name, &result_list); | |||
| for (auto result : result_list) { | |||
| auto tensor = std::get<1>(result); | |||
| if (!tensor) { | |||
| // tensor was not found, creating empty tensor base. | |||
| if (!tensor || cur_root_graph_id_ != tensor->GetRootGraphId()) { | |||
| // tensor was not found or tensor's graph was not executed in the current step, creating empty tensor base. | |||
| TensorBase tensor_base_item; | |||
| tensor_base_item.set_data_size(0); | |||
| tensor_base_item.set_data_type(0); | |||
| @@ -1080,8 +1132,8 @@ std::list<TensorSummary> Debugger::LoadTensorsStat(const ProtoVector<TensorProto | |||
| debug_services_->SearchNodesTensors(name, &result_list); | |||
| for (auto result : result_list) { | |||
| auto tensor = std::get<1>(result); | |||
| if (!tensor) { | |||
| // tensor was not found, creating empty tensor summary. | |||
| if (!tensor || cur_root_graph_id_ != tensor->GetRootGraphId()) { | |||
| // tensor was not found or tensor's graph was not executed in the current step, creating empty tensor summary. | |||
| DebugServices::TensorStat tensor_stat; | |||
| AddTensorStatInfo(tensor_stat, &tensor_summary_list); | |||
| continue; | |||
| @@ -1326,7 +1378,7 @@ bool Debugger::CheckIp(const std::string &host) const { | |||
| uint32_t Debugger::GetFirstRunGraphId() const { return rungraph_id_list_.front(); } | |||
| void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index) { | |||
| void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, uint32_t root_graph_id) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) { | |||
| return; | |||
| @@ -1362,7 +1414,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output | |||
| } else { | |||
| keep_prev = false; | |||
| } | |||
| bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev); | |||
| bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev, root_graph_id); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | |||
| @@ -1374,35 +1426,36 @@ void Debugger::LoadParametersAndConst() { | |||
| MS_EXCEPTION_IF_NULL(graph_ptr_); | |||
| // load parameters | |||
| MS_LOG(INFO) << "Start to load Parameters for graph " << graph_ptr_->graph_id() << "."; | |||
| auto root_graph_id = graph_ptr_->root_graph_id(); | |||
| const auto ¶meters = graph_ptr_->inputs(); | |||
| for (auto &item : parameters) { | |||
| LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX); | |||
| LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX, root_graph_id); | |||
| } | |||
| // load value nodes | |||
| // get all constant values from the graph | |||
| MS_LOG(INFO) << "Start to load value nodes for graph " << graph_ptr_->graph_id() << "."; | |||
| const auto value_nodes = graph_ptr_->graph_value_nodes(); | |||
| for (auto &item : value_nodes) { | |||
| LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX); | |||
| LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX, root_graph_id); | |||
| } | |||
| } | |||
| void Debugger::LoadParametersAndConst(const KernelGraphPtr &graph) { | |||
| if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return; | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(graph_ptr_); | |||
| // load parameters | |||
| MS_LOG(INFO) << "Start to load Parameters for graph " << graph->graph_id() << "."; | |||
| const auto ¶meters = graph_ptr_->inputs(); | |||
| auto root_graph_id = graph->root_graph_id(); | |||
| const auto ¶meters = graph->inputs(); | |||
| for (auto &item : parameters) { | |||
| LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX); | |||
| LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX, root_graph_id); | |||
| } | |||
| // load value nodes | |||
| // get all constant values from the graph | |||
| MS_LOG(INFO) << "Start to load value nodes for graph " << graph->graph_id() << "."; | |||
| const auto value_nodes = graph_ptr_->graph_value_nodes(); | |||
| const auto value_nodes = graph->graph_value_nodes(); | |||
| for (auto &item : value_nodes) { | |||
| LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX); | |||
| LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX, root_graph_id); | |||
| } | |||
| } | |||
| @@ -1410,6 +1463,7 @@ void Debugger::LoadGraphOutputs() { | |||
| if (!(debugger_enabled() && device_target_ == kAscendDevice)) return; | |||
| MS_EXCEPTION_IF_NULL(graph_ptr_); | |||
| const auto &apply_kernels = graph_ptr_->execution_order(); | |||
| auto root_graph_id = graph_ptr_->root_graph_id(); | |||
| // for kernels, execution order starts from 1 | |||
| int exec_order = 1; | |||
| for (const auto &node : apply_kernels) { | |||
| @@ -1435,7 +1489,7 @@ void Debugger::LoadGraphOutputs() { | |||
| auto format = kOpFormat_DEFAULT; | |||
| string tensor_name = kernel_name + ':' + std::to_string(j); | |||
| ShapeVector int_shapes = trans::GetRuntimePaddingShape(node, j); | |||
| auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); | |||
| auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | |||
| @@ -1463,6 +1517,7 @@ void Debugger::UpdateStepNumGPU() { | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| ++num_step_; | |||
| MS_LOG(DEBUG) << "Update step for GPU, current step: " << num_step_; | |||
| } | |||
| } | |||
| @@ -80,6 +80,10 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // do nothing if graph is set already | |||
| void PreExecute(const KernelGraphPtr &graph_ptr); | |||
| void SetCurrentAndPrevRootGraph(uint32_t root_graph_id); | |||
| void StoreRunGraphIdList(uint32_t graph_id); | |||
| // analyze tensors and wait for command | |||
| // don't need a graph_ptr because it is saved during pre_execute | |||
| void PostExecute(); | |||
| @@ -131,6 +135,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // version_check should be true if you want the function to do backend compatibility check with Mindinsight | |||
| bool SendMetadata(bool version_check); | |||
| bool CheckSendMetadata(); | |||
| void LoadParametersAndConst(); | |||
| void LoadParametersAndConst(const KernelGraphPtr &graph); | |||
| @@ -149,6 +155,10 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| uint32_t GetFirstRunGraphId() const; | |||
| uint32_t GetCurrentRootGraphId() const { return cur_root_graph_id_; } | |||
| uint32_t GetPrevRootGraphId() const { return prev_root_graph_id_; } | |||
| void SetGraphPtr(const KernelGraphPtr &graph_ptr) { graph_ptr_ = graph_ptr; } | |||
| const KernelGraphPtr GetGraphPtr() const { return graph_ptr_; } | |||
| @@ -246,7 +256,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // Check if the IP is valid | |||
| bool CheckIp(const std::string &host) const; | |||
| void LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index); | |||
| void LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, uint32_t root_graph_id); | |||
| // class members | |||
| @@ -263,9 +273,13 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| std::string node_name_; | |||
| std::string cur_name_; | |||
| bool training_done_; | |||
| bool send_metadata_done_; | |||
| bool received_new_graph_; | |||
| bool is_dataset_graph_; | |||
| bool partial_memory_; | |||
| std::mutex access_lock_; | |||
| uint32_t cur_root_graph_id_ = UINT32_MAX; | |||
| uint32_t prev_root_graph_id_ = UINT32_MAX; | |||
| // flag to keep track of the very first suspension of debugger | |||
| bool initial_suspend_; | |||
| bool enable_heartbeat_; | |||
| @@ -52,7 +52,8 @@ std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t & | |||
| return real_outputs; | |||
| } | |||
| void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) { | |||
| void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_, | |||
| uint32_t root_graph_id) { | |||
| // get inputs | |||
| auto kernel_inputs = launch_info_->inputs_; | |||
| auto input_size = AnfAlgo::GetInputTensorNum(cnode); | |||
| @@ -70,7 +71,8 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uin | |||
| auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type); | |||
| string input_tensor_name = input_kernel_name + ':' + "0"; | |||
| ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX); | |||
| auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true); | |||
| auto ret = | |||
| gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true, root_graph_id); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!"; | |||
| @@ -79,7 +81,8 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uin | |||
| } | |||
| } | |||
| void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) { | |||
| void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_, | |||
| uint32_t root_graph_id) { | |||
| // get outputs | |||
| auto kernel_outputs = launch_info_->outputs_; | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | |||
| @@ -99,7 +102,7 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, ui | |||
| auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type); | |||
| string tensor_name = kernel_name + ':' + std::to_string(j); | |||
| ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j); | |||
| auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false); | |||
| auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false, root_graph_id); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | |||
| @@ -136,15 +139,17 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_ | |||
| } | |||
| auto &dump_json_parser = DumpJsonParser::GetInstance(); | |||
| bool dump_enabled = debugger->DumpDataEnabledIteration(); | |||
| auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph()); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto root_graph_id = kernel_graph->root_graph_id(); | |||
| if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) { | |||
| LoadInputs(cnode, launch_info_, exec_order_); | |||
| LoadInputs(cnode, launch_info_, exec_order_, root_graph_id); | |||
| } | |||
| if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) { | |||
| LoadOutputs(cnode, launch_info_, exec_order_); | |||
| LoadOutputs(cnode, launch_info_, exec_order_, root_graph_id); | |||
| } | |||
| // Dump kernel | |||
| if (dump_enabled) { | |||
| auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph()); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto graph_id = kernel_graph->graph_id(); | |||
| debugger->DumpSingleNode(cnode, graph_id); | |||
| @@ -26,9 +26,11 @@ namespace mindspore { | |||
| std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size); | |||
| void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_); | |||
| void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_, | |||
| uint32_t root_graph_id); | |||
| void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_); | |||
| void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_, | |||
| uint32_t root_graph_id); | |||
| bool CheckReadData(const CNodePtr &cnode); | |||
| @@ -589,8 +589,8 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std:: | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, | |||
| bool keep_prev) const { | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev, | |||
| uint32_t root_graph_id) const { | |||
| bool ret = false; | |||
| auto debugger = Debugger::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(debugger); | |||
| @@ -619,6 +619,7 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec | |||
| tensor_data->SetByteSize(LongToSize(out_tensor->data().nbytes())); | |||
| tensor_data->SetType((unsigned int)host_type); | |||
| tensor_data->SetShape(out_tensor->shape()); | |||
| tensor_data->SetRootGraphId(root_graph_id); | |||
| ret = debugger->LoadNewTensor(tensor_data, keep_prev); | |||
| return ret; | |||
| } | |||
| @@ -62,7 +62,8 @@ class AscendDeviceAddress : public DeviceAddress { | |||
| #endif | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override; | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev, | |||
| uint32_t root_graph_id = 0) const override; | |||
| #endif | |||
| private: | |||
| @@ -118,7 +118,8 @@ class DeviceAddress : public mindspore::DeviceSync { | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| virtual bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const { | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev, | |||
| uint32_t root_graph_id = 0) const { | |||
| return true; | |||
| } | |||
| #endif | |||
| @@ -141,8 +141,8 @@ GPUDeviceAddress::~GPUDeviceAddress() { ClearDeviceMemory(); } | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, | |||
| bool keep_prev) const { | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev, | |||
| uint32_t root_graph_id) const { | |||
| bool ret = false; | |||
| if (size_ == 0) { | |||
| return true; | |||
| @@ -171,6 +171,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi | |||
| tensor_data->SetByteSize(out_tensor->data().nbytes()); | |||
| tensor_data->SetType((unsigned int)host_type); | |||
| tensor_data->SetShape(out_tensor->shape()); | |||
| tensor_data->SetRootGraphId(root_graph_id); | |||
| ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev); | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| return ret; | |||
| @@ -54,7 +54,8 @@ class GPUDeviceAddress : public DeviceAddress { | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override; | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev, | |||
| uint32_t root_graph_id = 0) const override; | |||
| #endif | |||
| private: | |||
| @@ -114,10 +114,10 @@ void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const | |||
| #ifdef ENABLE_DEBUGGER | |||
| auto debugger = Debugger::GetInstance(); | |||
| if (debugger != nullptr) { | |||
| debugger->Debugger::UpdateStepNumGPU(); | |||
| // Reset exec_order for the next step | |||
| exec_order_ = 0; | |||
| debugger->Debugger::PostExecuteGraphDebugger(); | |||
| debugger->Debugger::UpdateStepNumGPU(); | |||
| } | |||
| #else | |||
| #ifndef ENABLE_SECURITY | |||
| @@ -324,6 +324,7 @@ GraphId GraphCompiler::CompileGraph(const AnfNodePtrList &nodes, const AnfNodePt | |||
| auto backend_node = graph->output(); | |||
| MS_EXCEPTION_IF_NULL(backend_node); | |||
| graph->CacheGraphOutputToFrontNodeWithIndex({backend_node}, outputs); | |||
| graph->set_root_graph_id(graph_id); | |||
| return graph_id; | |||
| } | |||