| @@ -1003,18 +1003,9 @@ void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs) | |||||
| void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const { | void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const { | ||||
| MS_LOG(INFO) << "Start!"; | MS_LOG(INFO) << "Start!"; | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| if (debugger_->DebuggerBackendEnabled()) { | |||||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||||
| // TensorData will be freed up here | |||||
| debugger_->EmptyTensor(); | |||||
| uint32_t iter_num = debugger_->GetTensorLoaderIterNum(); | |||||
| debugger_->SetTensorLoaderIterNum(++iter_num); | |||||
| (void)runtime_instance->LoadData(kernel_graph.get()); | |||||
| debugger_->EmptyPrevTensor(); | |||||
| } | |||||
| #endif | |||||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||||
| (void)runtime_instance->LoadData(kernel_graph.get()); | |||||
| MS_LOG(INFO) << "Finish!"; | MS_LOG(INFO) << "Finish!"; | ||||
| } | } | ||||
| @@ -360,7 +360,9 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor: | |||||
| SyncValueNodeDeviceAddr(kernel_graph); | SyncValueNodeDeviceAddr(kernel_graph); | ||||
| // Load input data from user input | // Load input data from user input | ||||
| LoadInputData(kernel_graph, inputs); | LoadInputData(kernel_graph, inputs); | ||||
| PreIterationDbg(kernel_graph); | |||||
| if (debugger_) { | |||||
| debugger_->PreExecute(kernel_graph, graph_sum_); | |||||
| } | |||||
| #if ENABLE_CPU && ENABLE_GPU | #if ENABLE_CPU && ENABLE_GPU | ||||
| // Initialize parameter server | // Initialize parameter server | ||||
| InitPSParamAndOptim(kernel_graph, inputs); | InitPSParamAndOptim(kernel_graph, inputs); | ||||
| @@ -372,7 +374,6 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor: | |||||
| for (int64_t i = 0; i < loopsize; i++) { | for (int64_t i = 0; i < loopsize; i++) { | ||||
| Execute(kernel_graph); | Execute(kernel_graph); | ||||
| } | } | ||||
| PostLoadTensor(kernel_graph); | |||||
| // In pynative mode, device addresses of tensors in value nodes need be clean. | // In pynative mode, device addresses of tensors in value nodes need be clean. | ||||
| CleanValueNodeDeviceAddr(kernel_graph); | CleanValueNodeDeviceAddr(kernel_graph); | ||||
| // Summary | // Summary | ||||
| @@ -443,13 +444,6 @@ bool GPUSession::DumpDataEnabledIteration() const { | |||||
| return runtime_instance->DumpDataEnabledIteration(); | return runtime_instance->DumpDataEnabledIteration(); | ||||
| } | } | ||||
| void GPUSession::PreIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||||
| if (debugger_) { | |||||
| debugger_->PreExecute(kernel_graph, graph_sum_); | |||||
| } | |||||
| PreLoadTensor(kernel_graph); | |||||
| } | |||||
| void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const { | void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const { | ||||
| bool dump_enabled = DumpDataEnabledIteration(); | bool dump_enabled = DumpDataEnabledIteration(); | ||||
| // debug used for dump | // debug used for dump | ||||
| @@ -463,30 +457,6 @@ void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_gra | |||||
| } | } | ||||
| } | } | ||||
| void GPUSession::PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||||
| bool dump_enabled = DumpDataEnabledIteration(); | |||||
| if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) { | |||||
| return; | |||||
| } | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||||
| debugger_->EmptyTensor(); | |||||
| uint32_t iter_num = debugger_->GetTensorLoaderIterNum(); | |||||
| debugger_->SetTensorLoaderIterNum(++iter_num); | |||||
| } | |||||
| void GPUSession::PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||||
| bool dump_enabled = DumpDataEnabledIteration(); | |||||
| if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) { | |||||
| return; | |||||
| } | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||||
| debugger_->EmptyPrevTensor(); | |||||
| } | |||||
| void GPUSession::SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const { | void GPUSession::SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const { | ||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| @@ -75,14 +75,8 @@ class GPUSession : public SessionBasic { | |||||
| bool DumpDataEnabledIteration() const; | bool DumpDataEnabledIteration() const; | ||||
| void PreIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||||
| void PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| void PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||||
| void PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||||
| void SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| void CleanValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void CleanValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| @@ -66,7 +66,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||||
| std::vector<std::vector<parameter_t>> *parameters, | std::vector<std::vector<parameter_t>> *parameters, | ||||
| std::vector<int32_t> *error_codes, const std::vector<std::string> &op_overflows, | std::vector<int32_t> *error_codes, const std::vector<std::string> &op_overflows, | ||||
| const std::vector<std::shared_ptr<TensorData>> &tensor_list, | const std::vector<std::shared_ptr<TensorData>> &tensor_list, | ||||
| const bool init_dbg_suspend) { | |||||
| const bool init_dbg_suspend, const bool step_end, const bool recheck) { | |||||
| std::lock_guard<std::mutex> lg(lock_); | std::lock_guard<std::mutex> lg(lock_); | ||||
| if (watchpoint_table.empty()) return; | if (watchpoint_table.empty()) return; | ||||
| @@ -75,13 +75,26 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||||
| const auto tensor_name_no_slot = tensor_name.substr(0, tensor_name.find_first_of(':')); | const auto tensor_name_no_slot = tensor_name.substr(0, tensor_name.find_first_of(':')); | ||||
| const auto tensor_slot = std::to_string(tensor->GetSlot()); | const auto tensor_slot = std::to_string(tensor->GetSlot()); | ||||
| mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor(); | mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor(); | ||||
| // no elements to analyze | |||||
| if (tensor_ptr->DataSize() == 0) continue; | |||||
| int tensor_dtype = tensor_ptr->data_type_c(); | int tensor_dtype = tensor_ptr->data_type_c(); | ||||
| std::vector<watchpoint_t> watchpoints_to_check; | std::vector<watchpoint_t> watchpoints_to_check; | ||||
| std::string qualified_tensor_name; | std::string qualified_tensor_name; | ||||
| for (auto w_table_item : watchpoint_table) { | for (auto w_table_item : watchpoint_table) { | ||||
| auto wp = std::get<1>(w_table_item); | auto wp = std::get<1>(w_table_item); | ||||
| if (wp.condition.type == INIT && !init_dbg_suspend) continue; | |||||
| // check ONLY init conditions on intial suspended state. | |||||
| // skip other conditions on intial suspended state | |||||
| // skip init condition on all the other states | |||||
| if ((wp.condition.type == INIT) ^ init_dbg_suspend) continue; | |||||
| if (wp.condition.type != IS_OVERFLOW && tensor_dtype == kNumberTypeBool) continue; | if (wp.condition.type != IS_OVERFLOW && tensor_dtype == kNumberTypeBool) continue; | ||||
| // check change conditions only on step end. | |||||
| if (wp.change_condition() && !step_end) continue; | |||||
| // if recheck, ignore the cache results and reanalyze everything. | |||||
| // if not a recheck, check only unanalyzed tensors | |||||
| if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue; | |||||
| std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot); | std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot); | ||||
| if (!found.empty()) { | if (!found.empty()) { | ||||
| qualified_tensor_name = found; | qualified_tensor_name = found; | ||||
| @@ -174,6 +187,10 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||||
| error_code = std::get<1>(item); | error_code = std::get<1>(item); | ||||
| parameter_list = std::get<2>(item); | parameter_list = std::get<2>(item); | ||||
| } | } | ||||
| // add analyzed tensor to cache | |||||
| if (!recheck) { | |||||
| wp_id_cache[tensor_name].insert(wp.id); | |||||
| } | |||||
| if (is_hit || error_code) { | if (is_hit || error_code) { | ||||
| name->push_back(qualified_tensor_name); | name->push_back(qualified_tensor_name); | ||||
| @@ -238,28 +255,6 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode | |||||
| } | } | ||||
| } | } | ||||
| void DebugServices::AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, | |||||
| const CNodePtr &kernel) { | |||||
| if (kernel) { | |||||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||||
| for (size_t j = 0; j < input_size; ++j) { | |||||
| auto input_kernel = kernel->input(j + 1); | |||||
| std::string input_kernel_name = input_kernel->fullname_with_scope(); | |||||
| auto found_dot = input_kernel_name.find_last_of('.'); | |||||
| if (found_dot != std::string::npos && | |||||
| (input_kernel_name.substr(found_dot + 1) == "weight" || input_kernel_name.substr(found_dot + 1) == "bias")) { | |||||
| std::string locate_tensor = input_kernel_name + ":0"; | |||||
| std::map<std::string, std::shared_ptr<TensorData>> tensor_map = tensor_loader_->GetTensorMap(); | |||||
| std::map<std::string, std::shared_ptr<TensorData>>::iterator iter; | |||||
| iter = tensor_map.find(locate_tensor); | |||||
| if (iter != tensor_map.end()) { | |||||
| tensor_list->push_back(iter->second); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); } | void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); } | ||||
| std::vector<std::shared_ptr<TensorData>> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); } | std::vector<std::shared_ptr<TensorData>> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); } | ||||
| @@ -292,4 +287,32 @@ std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::Get | |||||
| return watchpoint_table; | return watchpoint_table; | ||||
| } | } | ||||
| void DebugServices::ResetLoadedTensors() { | |||||
| wp_id_cache.clear(); | |||||
| MS_LOG(INFO) << "Resetting loaded tensors"; | |||||
| tensor_loader_->MoveParametersCurrentToPrev(); | |||||
| tensor_loader_->EmptyCurrentTensor(); | |||||
| // will move parameters from previous to current map | |||||
| tensor_loader_->SwapCurrentPrev(); | |||||
| } | |||||
| std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNodePtr &kernel) { | |||||
| MS_EXCEPTION_IF_NULL(kernel); | |||||
| std::vector<std::shared_ptr<TensorData>> result; | |||||
| auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | |||||
| auto kernel_name = kernel->fullname_with_scope(); | |||||
| for (size_t j = 0; j < output_size; ++j) { | |||||
| auto tensor_name_with_slot = kernel_name + ":" + std::to_string(j); | |||||
| auto tensor = tensor_loader_->GetTensor(tensor_name_with_slot); | |||||
| if (tensor) result.push_back(tensor); | |||||
| } | |||||
| return result; | |||||
| } | |||||
| bool DebugServices::TensorExistsInCurrent(std::string tensor_name) { | |||||
| return tensor_loader_->TensorExistsInCurrent(tensor_name); | |||||
| } | |||||
| void DebugServices::MoveTensorCurrentToPrev(std::string tensor_name) { | |||||
| tensor_loader_->MoveTensorCurrentToPrev(tensor_name); | |||||
| } | |||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -22,6 +22,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <tuple> | #include <tuple> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <set> | |||||
| #include <mutex> | #include <mutex> | ||||
| #include <map> | #include <map> | ||||
| #include <limits> | #include <limits> | ||||
| @@ -160,6 +161,10 @@ class DebugServices { | |||||
| bool range_enabled() const { | bool range_enabled() const { | ||||
| return condition.type == RANGE && (!parameter_list[0].disabled || !parameter_list[1].disabled); | return condition.type == RANGE && (!parameter_list[0].disabled || !parameter_list[1].disabled); | ||||
| } | } | ||||
| bool change_condition() const { | |||||
| return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL || condition.type == NOT_CHANGED; | |||||
| } | |||||
| } watchpoint_t; | } watchpoint_t; | ||||
| void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter, | void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter, | ||||
| @@ -171,7 +176,8 @@ class DebugServices { | |||||
| void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition, | void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition, | ||||
| std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters, | std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters, | ||||
| std::vector<int32_t> *error_code, const std::vector<std::string> &op_overflows, | std::vector<int32_t> *error_code, const std::vector<std::string> &op_overflows, | ||||
| const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend); | |||||
| const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend, | |||||
| const bool step_end, const bool recheck); | |||||
| void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | ||||
| std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | ||||
| @@ -181,8 +187,6 @@ class DebugServices { | |||||
| bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const; | bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const; | ||||
| void AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, const CNodePtr &kernel); | |||||
| void EmptyTensor(); | void EmptyTensor(); | ||||
| std::vector<std::shared_ptr<TensorData>> GetTensor() const; | std::vector<std::shared_ptr<TensorData>> GetTensor() const; | ||||
| @@ -205,9 +209,19 @@ class DebugServices { | |||||
| std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable(); | std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable(); | ||||
| void ResetLoadedTensors(); | |||||
| std::vector<std::shared_ptr<TensorData>> GetNodeTensor(const CNodePtr &kernel); | |||||
| bool TensorExistsInCurrent(std::string tensor_name); | |||||
| void MoveTensorCurrentToPrev(std::string tensor_name); | |||||
| private: | private: | ||||
| std::mutex lock_; | std::mutex lock_; | ||||
| // to keep track of watchpoints that have been checked already for a tensor in current step | |||||
| std::unordered_map<std::string, std::set<int32_t>> wp_id_cache; | |||||
| std::unordered_map<unsigned int, watchpoint_t> watchpoint_table; | std::unordered_map<unsigned int, watchpoint_t> watchpoint_table; | ||||
| TensorLoader *tensor_loader_; | TensorLoader *tensor_loader_; | ||||
| @@ -313,20 +313,16 @@ void Debugger::PostExecute() { | |||||
| } | } | ||||
| if (debugger_->DebuggerBackendEnabled()) { | if (debugger_->DebuggerBackendEnabled()) { | ||||
| // analyze tensor data and send the watchpoints been hit | // analyze tensor data and send the watchpoints been hit | ||||
| if (run_level_ == "node") { | |||||
| MS_LOG(INFO) << "Debugger is in node level mode "; | |||||
| return; | |||||
| } | |||||
| if (debugger_enabled_ && !is_dataset_graph_) { | if (debugger_enabled_ && !is_dataset_graph_) { | ||||
| if (device_target_ != kGPUDevice) { | if (device_target_ != kGPUDevice) { | ||||
| num_step_++; | num_step_++; | ||||
| MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | |||||
| SendWatchpoints(CheckWatchpoints()); | |||||
| CommandLoop(); | |||||
| } else { | |||||
| CommandLoop(); | |||||
| } | } | ||||
| MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | |||||
| SendWatchpoints(CheckWatchpoints()); | |||||
| CommandLoop(); | |||||
| } | } | ||||
| // Only keep parameters in the current map | |||||
| debug_services_->ResetLoadedTensors(); | |||||
| } | } | ||||
| } | } | ||||
| @@ -596,7 +592,7 @@ void Debugger::CommandLoop() { | |||||
| MS_LOG(INFO) << "RunCMD"; | MS_LOG(INFO) << "RunCMD"; | ||||
| if (GetRunLevel(reply) == "recheck") { | if (GetRunLevel(reply) == "recheck") { | ||||
| MS_LOG(INFO) << "rechecking all watchpoints"; | MS_LOG(INFO) << "rechecking all watchpoints"; | ||||
| SendWatchpoints(CheckWatchpoints()); | |||||
| SendWatchpoints(CheckWatchpoints("", nullptr, true)); | |||||
| } else { | } else { | ||||
| // no longer the initial suspension. | // no longer the initial suspension. | ||||
| initial_suspend_ = false; | initial_suspend_ = false; | ||||
| @@ -705,9 +701,6 @@ void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCon | |||||
| return DebugServices::parameter_t{parameter.name(), parameter.disabled(), parameter.value(), parameter.hit()}; | return DebugServices::parameter_t{parameter.name(), parameter.disabled(), parameter.value(), parameter.hit()}; | ||||
| }); | }); | ||||
| debug_services_->AddWatchpoint(id, condition.condition(), condition.value(), check_node_list, parameter_list); | debug_services_->AddWatchpoint(id, condition.condition(), condition.value(), check_node_list, parameter_list); | ||||
| if (initial_suspend_ && | |||||
| static_cast<DebugServices::CONDITION_TYPE>(condition.condition()) == DebugServices::CONDITION_TYPE::INIT) | |||||
| SendWatchpoints(CheckWatchpoints()); | |||||
| } | } | ||||
| void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); } | void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); } | ||||
| @@ -780,7 +773,8 @@ void Debugger::Exit() { | |||||
| } | } | ||||
| } | } | ||||
| std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel) { | |||||
| std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel, | |||||
| bool recheck) { | |||||
| std::vector<std::string> name; | std::vector<std::string> name; | ||||
| std::vector<std::string> slot; | std::vector<std::string> slot; | ||||
| std::vector<int> condition; | std::vector<int> condition; | ||||
| @@ -795,11 +789,10 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode | |||||
| if (watchnode.empty()) { | if (watchnode.empty()) { | ||||
| tensor_list = debug_services_->GetTensor(); | tensor_list = debug_services_->GetTensor(); | ||||
| } else { | } else { | ||||
| tensor_list = debug_services_->GetNodeTensorMap(watchnode); | |||||
| debug_services_->AddWeightsBiasInputs(&tensor_list, kernel); | |||||
| tensor_list = debug_services_->GetNodeTensor(kernel); | |||||
| } | } | ||||
| debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops, | debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops, | ||||
| tensor_list, initial_suspend_); | |||||
| tensor_list, initial_suspend_, watchnode.empty(), recheck); | |||||
| std::list<WatchpointHit> hits; | std::list<WatchpointHit> hits; | ||||
| for (unsigned int i = 0; i < name.size(); i++) { | for (unsigned int i = 0; i < name.size(); i++) { | ||||
| WatchpointHit hit; | WatchpointHit hit; | ||||
| @@ -1045,7 +1038,7 @@ std::vector<std::string> Debugger::CheckOpOverflow() { | |||||
| } | } | ||||
| closedir(d); | closedir(d); | ||||
| if (op_names.size()) { | |||||
| if (!op_names.empty()) { | |||||
| MS_LOG(ERROR) << "These operation overflows are detected " << op_names; | MS_LOG(ERROR) << "These operation overflows are detected " << op_names; | ||||
| } | } | ||||
| @@ -1091,12 +1084,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output | |||||
| if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) { | if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) { | ||||
| return; | return; | ||||
| } | } | ||||
| bool keep_prev; | |||||
| if (anf_node->isa<Parameter>()) { | |||||
| keep_prev = true; | |||||
| } else { | |||||
| keep_prev = false; | |||||
| } | |||||
| // for parameters and value nodes, set its execution order to be 0; | // for parameters and value nodes, set its execution order to be 0; | ||||
| int exec_order = 0; | int exec_order = 0; | ||||
| std::string node_name = anf_node->fullname_with_scope(); | std::string node_name = anf_node->fullname_with_scope(); | ||||
| @@ -1114,6 +1101,13 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output | |||||
| auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); | auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); | ||||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | ||||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | [](size_t inner_item) { return SizeToInt(inner_item); }); | ||||
| bool keep_prev; | |||||
| if (anf_node->isa<Parameter>()) { | |||||
| keep_prev = true; | |||||
| debug_services_->MoveTensorCurrentToPrev(tensor_name); | |||||
| } else { | |||||
| keep_prev = false; | |||||
| } | |||||
| bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev); | bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev); | ||||
| if (!ret) { | if (!ret) { | ||||
| MS_LOG(ERROR) << "LoadMemToHost:" | MS_LOG(ERROR) << "LoadMemToHost:" | ||||
| @@ -1123,9 +1117,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output | |||||
| void Debugger::LoadParametersAndConst() { | void Debugger::LoadParametersAndConst() { | ||||
| if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return; | if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return; | ||||
| if (!(num_step_ == 0 || device_target_ == kAscendDevice || | |||||
| (device_target_ == kGPUDevice && device::KernelRuntime::DumpDataEnabledIteration()))) | |||||
| return; | |||||
| MS_EXCEPTION_IF_NULL(graph_ptr_); | MS_EXCEPTION_IF_NULL(graph_ptr_); | ||||
| // load parameters | // load parameters | ||||
| MS_LOG(INFO) << "Start to load Parameters!"; | MS_LOG(INFO) << "Start to load Parameters!"; | ||||
| @@ -1199,5 +1190,8 @@ void Debugger::ClearCurrentData() { | |||||
| if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) | if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) | ||||
| debug_services_->EmptyCurrentTensor(); | debug_services_->EmptyCurrentTensor(); | ||||
| } | } | ||||
| bool Debugger::TensorExistsInCurrent(std::string tensor_name) { | |||||
| return debug_services_->TensorExistsInCurrent(tensor_name); | |||||
| } | |||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -145,6 +145,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||||
| std::list<KernelGraphPtr> GetGraphPtrList() { return graph_ptr_list_; } | std::list<KernelGraphPtr> GetGraphPtrList() { return graph_ptr_list_; } | ||||
| bool TensorExistsInCurrent(std::string tensor_name); | |||||
| private: | private: | ||||
| // private constructor for singleton | // private constructor for singleton | ||||
| Debugger(); | Debugger(); | ||||
| @@ -197,7 +199,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||||
| // analyze tensors and check watchpoint conditions | // analyze tensors and check watchpoint conditions | ||||
| // return names of tensors and what condition they hit | // return names of tensors and what condition they hit | ||||
| std::list<WatchpointHit> CheckWatchpoints(const std::string &watchnode = std::string(), | std::list<WatchpointHit> CheckWatchpoints(const std::string &watchnode = std::string(), | ||||
| const CNodePtr &kernel = NULL); | |||||
| const CNodePtr &kernel = nullptr, bool recheck = false); | |||||
| // send watchpoints that hit | // send watchpoints that hit | ||||
| void SendWatchpoints(const std::list<WatchpointHit> &points); | void SendWatchpoints(const std::list<WatchpointHit> &points); | ||||
| @@ -33,6 +33,44 @@ class TensorLoader { | |||||
| ~TensorLoader() { EmptyTensor(); } | ~TensorLoader() { EmptyTensor(); } | ||||
| void MoveTensorCurrentToPrev(std::string tensor_name) { | |||||
| auto handle = tensor_list_map.extract(tensor_name); | |||||
| if (!handle.empty()) { | |||||
| MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map"; | |||||
| prev_tensor_list_map.insert(std::move(handle)); | |||||
| } | |||||
| } | |||||
| void SwapCurrentPrev() { tensor_list_map.swap(prev_tensor_list_map); } | |||||
| bool TensorExistsInCurrent(std::string tensor_name) { | |||||
| return tensor_list_map.find(tensor_name) != tensor_list_map.end(); | |||||
| } | |||||
| // only parameters will return true | |||||
| bool PrevTensorExistsInCurrent(std::string tensor_name) { return TensorExistsInCurrent(tensor_name + ":prev"); } | |||||
| void MoveParametersCurrentToPrev() { | |||||
| MS_LOG(INFO) << "Moving parameters from current map to previous map"; | |||||
| auto iter = tensor_list_map.begin(); | |||||
| while (iter != tensor_list_map.end()) { | |||||
| auto key = iter->first; | |||||
| if (PrevTensorExistsInCurrent(key)) { | |||||
| // :prev tensor only exists for parameter. Move it to prev | |||||
| ++iter; | |||||
| MoveTensorCurrentToPrev(key); | |||||
| } else { | |||||
| ++iter; | |||||
| } | |||||
| } | |||||
| } | |||||
| bool IsPrevTensor(std::string tensor_name) { | |||||
| const std::string suffix = ":prev"; | |||||
| if (tensor_name.length() <= suffix.length()) return false; | |||||
| return std::equal(suffix.rbegin(), suffix.rend(), tensor_name.rbegin()); | |||||
| } | |||||
| bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) { | bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) { | ||||
| std::lock_guard<std::mutex> lg(lock_); | std::lock_guard<std::mutex> lg(lock_); | ||||
| if (keep_prev) { | if (keep_prev) { | ||||
| @@ -43,20 +81,32 @@ class TensorLoader { | |||||
| tensor_list_map.insert(std::move(handle)); | tensor_list_map.insert(std::move(handle)); | ||||
| } | } | ||||
| } | } | ||||
| tensor_list.push_back(tensor); | |||||
| tensor_list_map[tensor->GetName()] = tensor; // use [] instead of insert to ensure latest value | tensor_list_map[tensor->GetName()] = tensor; // use [] instead of insert to ensure latest value | ||||
| auto node_name = tensor->GetName(); | auto node_name = tensor->GetName(); | ||||
| node_name = node_name.substr(0, node_name.find_first_of(":")); | node_name = node_name.substr(0, node_name.find_first_of(":")); | ||||
| node_tensor_map.insert({node_name, tensor}); | node_tensor_map.insert({node_name, tensor}); | ||||
| return true; | return true; | ||||
| } | } | ||||
| std::vector<std::shared_ptr<TensorData>> GetTensor() { return tensor_list; } | |||||
| std::vector<std::shared_ptr<TensorData>> GetTensor() { | |||||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||||
| for (auto &it : tensor_list_map) { | |||||
| if (!IsPrevTensor(it.first)) tensor_list.push_back(it.second); | |||||
| } | |||||
| return tensor_list; | |||||
| } | |||||
| std::shared_ptr<TensorData> GetTensor(const std::string &tensor_name) { | |||||
| auto iter = tensor_list_map.find(tensor_name); | |||||
| if (iter != tensor_list_map.end()) return iter->second; | |||||
| return nullptr; | |||||
| } | |||||
| uint32_t GetIterNum() { return iter_num; } | uint32_t GetIterNum() { return iter_num; } | ||||
| std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; } | std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; } | ||||
| std::shared_ptr<TensorData> GetPrevTensor(std::string tensor_name) { | |||||
| std::shared_ptr<TensorData> GetPrevTensor(const std::string &tensor_name) { | |||||
| if (tensor_list_map.find(tensor_name + ":prev") != tensor_list_map.end()) { | if (tensor_list_map.find(tensor_name + ":prev") != tensor_list_map.end()) { | ||||
| return tensor_list_map[tensor_name + ":prev"]; | return tensor_list_map[tensor_name + ":prev"]; | ||||
| } | } | ||||
| @@ -91,14 +141,13 @@ class TensorLoader { | |||||
| prev_tensor_list_map.clear(); | prev_tensor_list_map.clear(); | ||||
| node_tensor_map.clear(); | node_tensor_map.clear(); | ||||
| tensor_list_map.swap(prev_tensor_list_map); | tensor_list_map.swap(prev_tensor_list_map); | ||||
| tensor_list.clear(); | |||||
| } | } | ||||
| void EmptyPrevTensor() { prev_tensor_list_map.clear(); } | void EmptyPrevTensor() { prev_tensor_list_map.clear(); } | ||||
| void EmptyCurrentTensor() { | void EmptyCurrentTensor() { | ||||
| tensor_list_map.clear(); | tensor_list_map.clear(); | ||||
| tensor_list.clear(); | |||||
| node_tensor_map.clear(); | |||||
| } | } | ||||
| void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; } | void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; } | ||||
| @@ -142,7 +191,6 @@ class TensorLoader { | |||||
| } | } | ||||
| private: | private: | ||||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||||
| std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map; | std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map; | ||||
| std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map; | std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map; | ||||
| std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map; | std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map; | ||||
| @@ -674,6 +674,10 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec | |||||
| const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type, | const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type, | ||||
| size_t slot, bool keep_prev) const { | size_t slot, bool keep_prev) const { | ||||
| bool ret = false; | bool ret = false; | ||||
| if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) { | |||||
| MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again."; | |||||
| return true; | |||||
| } | |||||
| // TensorData is freed up in AscendSession class | // TensorData is freed up in AscendSession class | ||||
| auto tensor_data = std::make_shared<mindspore::TensorData>(); | auto tensor_data = std::make_shared<mindspore::TensorData>(); | ||||
| tensor_data->SetName(tensor_name); | tensor_data->SetName(tensor_name); | ||||
| @@ -296,8 +296,6 @@ bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) { | |||||
| MS_EXCEPTION_IF_NULL(graph); | MS_EXCEPTION_IF_NULL(graph); | ||||
| #ifdef ENABLE_DEBUGGER | #ifdef ENABLE_DEBUGGER | ||||
| MS_LOG(INFO) << "Start load step"; | MS_LOG(INFO) << "Start load step"; | ||||
| uint32_t cur_iter = 0; | |||||
| MS_LOG(INFO) << "Cur iter is " << cur_iter; | |||||
| for (auto graph_ptr : debugger_->GetGraphPtrList()) { | for (auto graph_ptr : debugger_->GetGraphPtrList()) { | ||||
| debugger_->SetGraphPtr(graph_ptr); | debugger_->SetGraphPtr(graph_ptr); | ||||
| // load output | // load output | ||||
| @@ -87,6 +87,11 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi | |||||
| return true; | return true; | ||||
| } | } | ||||
| if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) { | |||||
| MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again."; | |||||
| return true; | |||||
| } | |||||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape); | mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape); | ||||
| size_t host_size = out_tensor->data().nbytes(); | size_t host_size = out_tensor->data().nbytes(); | ||||
| auto ret_rt_memcpy = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); | auto ret_rt_memcpy = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); | ||||
| @@ -154,8 +154,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||||
| std::vector<int> real_outputs; | std::vector<int> real_outputs; | ||||
| real_outputs = CheckRealOutput(node_name, output_size); | real_outputs = CheckRealOutput(node_name, output_size); | ||||
| for (std::vector<int>::iterator it = real_outputs.begin(); it != real_outputs.end(); ++it) { | |||||
| auto j = *it; | |||||
| for (int j : real_outputs) { | |||||
| auto addr = kernel_outputs[j]; | auto addr = kernel_outputs[j]; | ||||
| auto type = AnfAlgo::GetOutputInferDataType(kernel, j); | auto type = AnfAlgo::GetOutputInferDataType(kernel, j); | ||||
| auto format = kOpFormat_DEFAULT; | auto format = kOpFormat_DEFAULT; | ||||