| @@ -1003,18 +1003,9 @@ void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs) | |||
| void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| MS_LOG(INFO) << "Start!"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| #ifdef ENABLE_DEBUGGER | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| // TensorData will be freed up here | |||
| debugger_->EmptyTensor(); | |||
| uint32_t iter_num = debugger_->GetTensorLoaderIterNum(); | |||
| debugger_->SetTensorLoaderIterNum(++iter_num); | |||
| (void)runtime_instance->LoadData(kernel_graph.get()); | |||
| debugger_->EmptyPrevTensor(); | |||
| } | |||
| #endif | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| (void)runtime_instance->LoadData(kernel_graph.get()); | |||
| MS_LOG(INFO) << "Finish!"; | |||
| } | |||
| @@ -360,7 +360,9 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor: | |||
| SyncValueNodeDeviceAddr(kernel_graph); | |||
| // Load input data from user input | |||
| LoadInputData(kernel_graph, inputs); | |||
| PreIterationDbg(kernel_graph); | |||
| if (debugger_) { | |||
| debugger_->PreExecute(kernel_graph, graph_sum_); | |||
| } | |||
| #if ENABLE_CPU && ENABLE_GPU | |||
| // Initialize parameter server | |||
| InitPSParamAndOptim(kernel_graph, inputs); | |||
| @@ -372,7 +374,6 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor: | |||
| for (int64_t i = 0; i < loopsize; i++) { | |||
| Execute(kernel_graph); | |||
| } | |||
| PostLoadTensor(kernel_graph); | |||
| // In pynative mode, device addresses of tensors in value nodes need be clean. | |||
| CleanValueNodeDeviceAddr(kernel_graph); | |||
| // Summary | |||
| @@ -443,13 +444,6 @@ bool GPUSession::DumpDataEnabledIteration() const { | |||
| return runtime_instance->DumpDataEnabledIteration(); | |||
| } | |||
| void GPUSession::PreIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| if (debugger_) { | |||
| debugger_->PreExecute(kernel_graph, graph_sum_); | |||
| } | |||
| PreLoadTensor(kernel_graph); | |||
| } | |||
| void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| bool dump_enabled = DumpDataEnabledIteration(); | |||
| // debug used for dump | |||
| @@ -463,30 +457,6 @@ void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_gra | |||
| } | |||
| } | |||
| void GPUSession::PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| bool dump_enabled = DumpDataEnabledIteration(); | |||
| if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) { | |||
| return; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| debugger_->EmptyTensor(); | |||
| uint32_t iter_num = debugger_->GetTensorLoaderIterNum(); | |||
| debugger_->SetTensorLoaderIterNum(++iter_num); | |||
| } | |||
| void GPUSession::PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| bool dump_enabled = DumpDataEnabledIteration(); | |||
| if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) { | |||
| return; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| debugger_->EmptyPrevTensor(); | |||
| } | |||
| void GPUSession::SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| @@ -75,14 +75,8 @@ class GPUSession : public SessionBasic { | |||
| bool DumpDataEnabledIteration() const; | |||
| void PreIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void CleanValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| @@ -66,7 +66,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||
| std::vector<std::vector<parameter_t>> *parameters, | |||
| std::vector<int32_t> *error_codes, const std::vector<std::string> &op_overflows, | |||
| const std::vector<std::shared_ptr<TensorData>> &tensor_list, | |||
| const bool init_dbg_suspend) { | |||
| const bool init_dbg_suspend, const bool step_end, const bool recheck) { | |||
| std::lock_guard<std::mutex> lg(lock_); | |||
| if (watchpoint_table.empty()) return; | |||
| @@ -75,13 +75,26 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||
| const auto tensor_name_no_slot = tensor_name.substr(0, tensor_name.find_first_of(':')); | |||
| const auto tensor_slot = std::to_string(tensor->GetSlot()); | |||
| mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor(); | |||
| // no elements to analyze | |||
| if (tensor_ptr->DataSize() == 0) continue; | |||
| int tensor_dtype = tensor_ptr->data_type_c(); | |||
| std::vector<watchpoint_t> watchpoints_to_check; | |||
| std::string qualified_tensor_name; | |||
| for (auto w_table_item : watchpoint_table) { | |||
| auto wp = std::get<1>(w_table_item); | |||
| if (wp.condition.type == INIT && !init_dbg_suspend) continue; | |||
| // check ONLY init conditions on intial suspended state. | |||
| // skip other conditions on intial suspended state | |||
| // skip init condition on all the other states | |||
| if ((wp.condition.type == INIT) ^ init_dbg_suspend) continue; | |||
| if (wp.condition.type != IS_OVERFLOW && tensor_dtype == kNumberTypeBool) continue; | |||
| // check change conditions only on step end. | |||
| if (wp.change_condition() && !step_end) continue; | |||
| // if recheck, ignore the cache results and reanalyze everything. | |||
| // if not a recheck, check only unanalyzed tensors | |||
| if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue; | |||
| std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot); | |||
| if (!found.empty()) { | |||
| qualified_tensor_name = found; | |||
| @@ -174,6 +187,10 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||
| error_code = std::get<1>(item); | |||
| parameter_list = std::get<2>(item); | |||
| } | |||
| // add analyzed tensor to cache | |||
| if (!recheck) { | |||
| wp_id_cache[tensor_name].insert(wp.id); | |||
| } | |||
| if (is_hit || error_code) { | |||
| name->push_back(qualified_tensor_name); | |||
| @@ -238,28 +255,6 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode | |||
| } | |||
| } | |||
| void DebugServices::AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, | |||
| const CNodePtr &kernel) { | |||
| if (kernel) { | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| for (size_t j = 0; j < input_size; ++j) { | |||
| auto input_kernel = kernel->input(j + 1); | |||
| std::string input_kernel_name = input_kernel->fullname_with_scope(); | |||
| auto found_dot = input_kernel_name.find_last_of('.'); | |||
| if (found_dot != std::string::npos && | |||
| (input_kernel_name.substr(found_dot + 1) == "weight" || input_kernel_name.substr(found_dot + 1) == "bias")) { | |||
| std::string locate_tensor = input_kernel_name + ":0"; | |||
| std::map<std::string, std::shared_ptr<TensorData>> tensor_map = tensor_loader_->GetTensorMap(); | |||
| std::map<std::string, std::shared_ptr<TensorData>>::iterator iter; | |||
| iter = tensor_map.find(locate_tensor); | |||
| if (iter != tensor_map.end()) { | |||
| tensor_list->push_back(iter->second); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); } | |||
| std::vector<std::shared_ptr<TensorData>> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); } | |||
| @@ -292,4 +287,32 @@ std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::Get | |||
| return watchpoint_table; | |||
| } | |||
| void DebugServices::ResetLoadedTensors() { | |||
| wp_id_cache.clear(); | |||
| MS_LOG(INFO) << "Resetting loaded tensors"; | |||
| tensor_loader_->MoveParametersCurrentToPrev(); | |||
| tensor_loader_->EmptyCurrentTensor(); | |||
| // will move parameters from previous to current map | |||
| tensor_loader_->SwapCurrentPrev(); | |||
| } | |||
| std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNodePtr &kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| std::vector<std::shared_ptr<TensorData>> result; | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | |||
| auto kernel_name = kernel->fullname_with_scope(); | |||
| for (size_t j = 0; j < output_size; ++j) { | |||
| auto tensor_name_with_slot = kernel_name + ":" + std::to_string(j); | |||
| auto tensor = tensor_loader_->GetTensor(tensor_name_with_slot); | |||
| if (tensor) result.push_back(tensor); | |||
| } | |||
| return result; | |||
| } | |||
| bool DebugServices::TensorExistsInCurrent(std::string tensor_name) { | |||
| return tensor_loader_->TensorExistsInCurrent(tensor_name); | |||
| } | |||
| void DebugServices::MoveTensorCurrentToPrev(std::string tensor_name) { | |||
| tensor_loader_->MoveTensorCurrentToPrev(tensor_name); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -22,6 +22,7 @@ | |||
| #include <memory> | |||
| #include <tuple> | |||
| #include <unordered_map> | |||
| #include <set> | |||
| #include <mutex> | |||
| #include <map> | |||
| #include <limits> | |||
| @@ -160,6 +161,10 @@ class DebugServices { | |||
| bool range_enabled() const { | |||
| return condition.type == RANGE && (!parameter_list[0].disabled || !parameter_list[1].disabled); | |||
| } | |||
| bool change_condition() const { | |||
| return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL || condition.type == NOT_CHANGED; | |||
| } | |||
| } watchpoint_t; | |||
| void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter, | |||
| @@ -171,7 +176,8 @@ class DebugServices { | |||
| void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition, | |||
| std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters, | |||
| std::vector<int32_t> *error_code, const std::vector<std::string> &op_overflows, | |||
| const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend); | |||
| const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend, | |||
| const bool step_end, const bool recheck); | |||
| void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | |||
| std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | |||
| @@ -181,8 +187,6 @@ class DebugServices { | |||
| bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const; | |||
| void AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, const CNodePtr &kernel); | |||
| void EmptyTensor(); | |||
| std::vector<std::shared_ptr<TensorData>> GetTensor() const; | |||
| @@ -205,9 +209,19 @@ class DebugServices { | |||
| std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable(); | |||
| void ResetLoadedTensors(); | |||
| std::vector<std::shared_ptr<TensorData>> GetNodeTensor(const CNodePtr &kernel); | |||
| bool TensorExistsInCurrent(std::string tensor_name); | |||
| void MoveTensorCurrentToPrev(std::string tensor_name); | |||
| private: | |||
| std::mutex lock_; | |||
| // to keep track of watchpoints that have been checked already for a tensor in current step | |||
| std::unordered_map<std::string, std::set<int32_t>> wp_id_cache; | |||
| std::unordered_map<unsigned int, watchpoint_t> watchpoint_table; | |||
| TensorLoader *tensor_loader_; | |||
| @@ -313,20 +313,16 @@ void Debugger::PostExecute() { | |||
| } | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| // analyze tensor data and send the watchpoints been hit | |||
| if (run_level_ == "node") { | |||
| MS_LOG(INFO) << "Debugger is in node level mode "; | |||
| return; | |||
| } | |||
| if (debugger_enabled_ && !is_dataset_graph_) { | |||
| if (device_target_ != kGPUDevice) { | |||
| num_step_++; | |||
| MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | |||
| SendWatchpoints(CheckWatchpoints()); | |||
| CommandLoop(); | |||
| } else { | |||
| CommandLoop(); | |||
| } | |||
| MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | |||
| SendWatchpoints(CheckWatchpoints()); | |||
| CommandLoop(); | |||
| } | |||
| // Only keep parameters in the current map | |||
| debug_services_->ResetLoadedTensors(); | |||
| } | |||
| } | |||
| @@ -596,7 +592,7 @@ void Debugger::CommandLoop() { | |||
| MS_LOG(INFO) << "RunCMD"; | |||
| if (GetRunLevel(reply) == "recheck") { | |||
| MS_LOG(INFO) << "rechecking all watchpoints"; | |||
| SendWatchpoints(CheckWatchpoints()); | |||
| SendWatchpoints(CheckWatchpoints("", nullptr, true)); | |||
| } else { | |||
| // no longer the initial suspension. | |||
| initial_suspend_ = false; | |||
| @@ -705,9 +701,6 @@ void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCon | |||
| return DebugServices::parameter_t{parameter.name(), parameter.disabled(), parameter.value(), parameter.hit()}; | |||
| }); | |||
| debug_services_->AddWatchpoint(id, condition.condition(), condition.value(), check_node_list, parameter_list); | |||
| if (initial_suspend_ && | |||
| static_cast<DebugServices::CONDITION_TYPE>(condition.condition()) == DebugServices::CONDITION_TYPE::INIT) | |||
| SendWatchpoints(CheckWatchpoints()); | |||
| } | |||
| void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); } | |||
| @@ -780,7 +773,8 @@ void Debugger::Exit() { | |||
| } | |||
| } | |||
| std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel) { | |||
| std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel, | |||
| bool recheck) { | |||
| std::vector<std::string> name; | |||
| std::vector<std::string> slot; | |||
| std::vector<int> condition; | |||
| @@ -795,11 +789,10 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode | |||
| if (watchnode.empty()) { | |||
| tensor_list = debug_services_->GetTensor(); | |||
| } else { | |||
| tensor_list = debug_services_->GetNodeTensorMap(watchnode); | |||
| debug_services_->AddWeightsBiasInputs(&tensor_list, kernel); | |||
| tensor_list = debug_services_->GetNodeTensor(kernel); | |||
| } | |||
| debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops, | |||
| tensor_list, initial_suspend_); | |||
| tensor_list, initial_suspend_, watchnode.empty(), recheck); | |||
| std::list<WatchpointHit> hits; | |||
| for (unsigned int i = 0; i < name.size(); i++) { | |||
| WatchpointHit hit; | |||
| @@ -1045,7 +1038,7 @@ std::vector<std::string> Debugger::CheckOpOverflow() { | |||
| } | |||
| closedir(d); | |||
| if (op_names.size()) { | |||
| if (!op_names.empty()) { | |||
| MS_LOG(ERROR) << "These operation overflows are detected " << op_names; | |||
| } | |||
| @@ -1091,12 +1084,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output | |||
| if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) { | |||
| return; | |||
| } | |||
| bool keep_prev; | |||
| if (anf_node->isa<Parameter>()) { | |||
| keep_prev = true; | |||
| } else { | |||
| keep_prev = false; | |||
| } | |||
| // for parameters and value nodes, set its execution order to be 0; | |||
| int exec_order = 0; | |||
| std::string node_name = anf_node->fullname_with_scope(); | |||
| @@ -1114,6 +1101,13 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| bool keep_prev; | |||
| if (anf_node->isa<Parameter>()) { | |||
| keep_prev = true; | |||
| debug_services_->MoveTensorCurrentToPrev(tensor_name); | |||
| } else { | |||
| keep_prev = false; | |||
| } | |||
| bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| @@ -1123,9 +1117,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output | |||
| void Debugger::LoadParametersAndConst() { | |||
| if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return; | |||
| if (!(num_step_ == 0 || device_target_ == kAscendDevice || | |||
| (device_target_ == kGPUDevice && device::KernelRuntime::DumpDataEnabledIteration()))) | |||
| return; | |||
| MS_EXCEPTION_IF_NULL(graph_ptr_); | |||
| // load parameters | |||
| MS_LOG(INFO) << "Start to load Parameters!"; | |||
| @@ -1199,5 +1190,8 @@ void Debugger::ClearCurrentData() { | |||
| if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) | |||
| debug_services_->EmptyCurrentTensor(); | |||
| } | |||
| bool Debugger::TensorExistsInCurrent(std::string tensor_name) { | |||
| return debug_services_->TensorExistsInCurrent(tensor_name); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -145,6 +145,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| std::list<KernelGraphPtr> GetGraphPtrList() { return graph_ptr_list_; } | |||
| bool TensorExistsInCurrent(std::string tensor_name); | |||
| private: | |||
| // private constructor for singleton | |||
| Debugger(); | |||
| @@ -197,7 +199,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // analyze tensors and check watchpoint conditions | |||
| // return names of tensors and what condition they hit | |||
| std::list<WatchpointHit> CheckWatchpoints(const std::string &watchnode = std::string(), | |||
| const CNodePtr &kernel = NULL); | |||
| const CNodePtr &kernel = nullptr, bool recheck = false); | |||
| // send watchpoints that hit | |||
| void SendWatchpoints(const std::list<WatchpointHit> &points); | |||
| @@ -33,6 +33,44 @@ class TensorLoader { | |||
| ~TensorLoader() { EmptyTensor(); } | |||
| void MoveTensorCurrentToPrev(std::string tensor_name) { | |||
| auto handle = tensor_list_map.extract(tensor_name); | |||
| if (!handle.empty()) { | |||
| MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map"; | |||
| prev_tensor_list_map.insert(std::move(handle)); | |||
| } | |||
| } | |||
| void SwapCurrentPrev() { tensor_list_map.swap(prev_tensor_list_map); } | |||
| bool TensorExistsInCurrent(std::string tensor_name) { | |||
| return tensor_list_map.find(tensor_name) != tensor_list_map.end(); | |||
| } | |||
| // only parameters will return true | |||
| bool PrevTensorExistsInCurrent(std::string tensor_name) { return TensorExistsInCurrent(tensor_name + ":prev"); } | |||
| void MoveParametersCurrentToPrev() { | |||
| MS_LOG(INFO) << "Moving parameters from current map to previous map"; | |||
| auto iter = tensor_list_map.begin(); | |||
| while (iter != tensor_list_map.end()) { | |||
| auto key = iter->first; | |||
| if (PrevTensorExistsInCurrent(key)) { | |||
| // :prev tensor only exists for parameter. Move it to prev | |||
| ++iter; | |||
| MoveTensorCurrentToPrev(key); | |||
| } else { | |||
| ++iter; | |||
| } | |||
| } | |||
| } | |||
| bool IsPrevTensor(std::string tensor_name) { | |||
| const std::string suffix = ":prev"; | |||
| if (tensor_name.length() <= suffix.length()) return false; | |||
| return std::equal(suffix.rbegin(), suffix.rend(), tensor_name.rbegin()); | |||
| } | |||
| bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) { | |||
| std::lock_guard<std::mutex> lg(lock_); | |||
| if (keep_prev) { | |||
| @@ -43,20 +81,32 @@ class TensorLoader { | |||
| tensor_list_map.insert(std::move(handle)); | |||
| } | |||
| } | |||
| tensor_list.push_back(tensor); | |||
| tensor_list_map[tensor->GetName()] = tensor; // use [] instead of insert to ensure latest value | |||
| auto node_name = tensor->GetName(); | |||
| node_name = node_name.substr(0, node_name.find_first_of(":")); | |||
| node_tensor_map.insert({node_name, tensor}); | |||
| return true; | |||
| } | |||
| std::vector<std::shared_ptr<TensorData>> GetTensor() { return tensor_list; } | |||
| std::vector<std::shared_ptr<TensorData>> GetTensor() { | |||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||
| for (auto &it : tensor_list_map) { | |||
| if (!IsPrevTensor(it.first)) tensor_list.push_back(it.second); | |||
| } | |||
| return tensor_list; | |||
| } | |||
| std::shared_ptr<TensorData> GetTensor(const std::string &tensor_name) { | |||
| auto iter = tensor_list_map.find(tensor_name); | |||
| if (iter != tensor_list_map.end()) return iter->second; | |||
| return nullptr; | |||
| } | |||
| uint32_t GetIterNum() { return iter_num; } | |||
| std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; } | |||
| std::shared_ptr<TensorData> GetPrevTensor(std::string tensor_name) { | |||
| std::shared_ptr<TensorData> GetPrevTensor(const std::string &tensor_name) { | |||
| if (tensor_list_map.find(tensor_name + ":prev") != tensor_list_map.end()) { | |||
| return tensor_list_map[tensor_name + ":prev"]; | |||
| } | |||
| @@ -91,14 +141,13 @@ class TensorLoader { | |||
| prev_tensor_list_map.clear(); | |||
| node_tensor_map.clear(); | |||
| tensor_list_map.swap(prev_tensor_list_map); | |||
| tensor_list.clear(); | |||
| } | |||
| void EmptyPrevTensor() { prev_tensor_list_map.clear(); } | |||
| void EmptyCurrentTensor() { | |||
| tensor_list_map.clear(); | |||
| tensor_list.clear(); | |||
| node_tensor_map.clear(); | |||
| } | |||
| void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; } | |||
| @@ -142,7 +191,6 @@ class TensorLoader { | |||
| } | |||
| private: | |||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||
| std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map; | |||
| std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map; | |||
| std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map; | |||
| @@ -674,6 +674,10 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec | |||
| const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type, | |||
| size_t slot, bool keep_prev) const { | |||
| bool ret = false; | |||
| if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) { | |||
| MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again."; | |||
| return true; | |||
| } | |||
| // TensorData is freed up in AscendSession class | |||
| auto tensor_data = std::make_shared<mindspore::TensorData>(); | |||
| tensor_data->SetName(tensor_name); | |||
| @@ -296,8 +296,6 @@ bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| #ifdef ENABLE_DEBUGGER | |||
| MS_LOG(INFO) << "Start load step"; | |||
| uint32_t cur_iter = 0; | |||
| MS_LOG(INFO) << "Cur iter is " << cur_iter; | |||
| for (auto graph_ptr : debugger_->GetGraphPtrList()) { | |||
| debugger_->SetGraphPtr(graph_ptr); | |||
| // load output | |||
| @@ -87,6 +87,11 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi | |||
| return true; | |||
| } | |||
| if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) { | |||
| MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again."; | |||
| return true; | |||
| } | |||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| auto ret_rt_memcpy = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); | |||
| @@ -154,8 +154,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||
| std::vector<int> real_outputs; | |||
| real_outputs = CheckRealOutput(node_name, output_size); | |||
| for (std::vector<int>::iterator it = real_outputs.begin(); it != real_outputs.end(); ++it) { | |||
| auto j = *it; | |||
| for (int j : real_outputs) { | |||
| auto addr = kernel_outputs[j]; | |||
| auto type = AnfAlgo::GetOutputInferDataType(kernel, j); | |||
| auto format = kOpFormat_DEFAULT; | |||