| @@ -81,9 +81,8 @@ bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr | |||
| #ifdef ENABLE_DEBUGGER | |||
| auto debugger_ = mindspore::Debugger::GetInstance(); | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| DebugServices *debug_services = debugger_->debug_services(); | |||
| std::string current_kernel_name = kernel_curr->scope_full_name(); | |||
| if (debug_services->IsWatchPoint(current_kernel_name)) { | |||
| if (debugger_->DebugServicesIsWatchPoint(current_kernel_name)) { | |||
| return false; | |||
| } | |||
| } | |||
| @@ -993,16 +993,12 @@ void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) | |||
| if (debugger_->DebuggerBackendEnabled()) { | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| DebugServices *debug_services = debugger_->debug_services(); | |||
| MS_EXCEPTION_IF_NULL(debug_services); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| MS_EXCEPTION_IF_NULL(tensor_loader); | |||
| // TensorData will be freed up here | |||
| tensor_loader->EmptyTensor(); | |||
| uint32_t iter_num = tensor_loader->GetIterNum(); | |||
| tensor_loader->set_iter_num(++iter_num); | |||
| debugger_->EmptyTensor(); | |||
| uint32_t iter_num = debugger_->GetTensorLoaderIterNum(); | |||
| debugger_->SetTensorLoaderIterNum(++iter_num); | |||
| (void)runtime_instance->LoadData(kernel_graph.get()); | |||
| tensor_loader->EmptyPrevTensor(); | |||
| debugger_->EmptyPrevTensor(); | |||
| } | |||
| #endif | |||
| MS_LOG(INFO) << "Finish!"; | |||
| @@ -462,11 +462,9 @@ void GPUSession::PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| DebugServices *debug_services = debugger_->debug_services(); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| tensor_loader->EmptyTensor(); | |||
| uint32_t iter_num = tensor_loader->GetIterNum(); | |||
| tensor_loader->set_iter_num(++iter_num); | |||
| debugger_->EmptyTensor(); | |||
| uint32_t iter_num = debugger_->GetTensorLoaderIterNum(); | |||
| debugger_->SetTensorLoaderIterNum(++iter_num); | |||
| } | |||
| void GPUSession::PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| @@ -477,9 +475,7 @@ void GPUSession::PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| DebugServices *debug_services = debugger_->debug_services(); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| tensor_loader->EmptyPrevTensor(); | |||
| debugger_->EmptyPrevTensor(); | |||
| } | |||
| void GPUSession::SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| @@ -67,16 +67,13 @@ void E2eDumpUtil::DumpMemToFile(const std::string &file_path, NotNull<const devi | |||
| void E2eDumpUtil::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name, | |||
| NotNull<const device::DeviceAddress *> addr, bool trans_flag, | |||
| const ShapeVector &int_shapes, const TypeId &type, size_t slot, Debugger *debugger) { | |||
| const ShapeVector &int_shapes, const TypeId &type, size_t slot, | |||
| const Debugger *debugger) { | |||
| #ifdef ENABLE_DEBUGGER | |||
| auto format = kOpFormat_DEFAULT; | |||
| MS_EXCEPTION_IF_NULL(debugger); | |||
| DebugServices *debug_services = debugger->debug_services(); | |||
| MS_EXCEPTION_IF_NULL(debug_services); | |||
| TensorLoader *tensor_loader = debug_services->tensor_loader(); | |||
| MS_EXCEPTION_IF_NULL(tensor_loader); | |||
| auto ret = tensor_loader->DumpTensorToFile(original_kernel_name, trans_flag, file_path, format, int_shapes, type, | |||
| addr->type_id(), addr->format(), slot); | |||
| auto ret = debugger->DumpTensorToFile(original_kernel_name, trans_flag, file_path, format, int_shapes, type, | |||
| addr->type_id(), addr->format(), slot); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "DumpTensorToFile Failed: flag:" << std::to_string(trans_flag) << ", path:" << file_path | |||
| << ", host_format:" << format; | |||
| @@ -50,7 +50,8 @@ class E2eDumpUtil { | |||
| const ShapeVector &int_shapes, const TypeId &type); | |||
| static void DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name, | |||
| NotNull<const device::DeviceAddress *> addr, bool trans_flag, | |||
| const ShapeVector &int_shapes, const TypeId &type, size_t slot, Debugger *debugger); | |||
| const ShapeVector &int_shapes, const TypeId &type, size_t slot, | |||
| const Debugger *debugger); | |||
| static void GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag, NotNull<ShapeVector *> int_shapes); | |||
| static bool IsDeviceTargetGPU(); | |||
| static void DumpSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path, | |||
| @@ -205,7 +205,7 @@ void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector< | |||
| } | |||
| } | |||
| bool DebugServices::IsWatchPoint(std::string kernel_name, const CNodePtr &kernel) { | |||
| bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel) const { | |||
| bool ret = false; | |||
| for (auto w_table_item : watchpoint_table) { | |||
| auto check_node_list = std::get<1>(w_table_item).check_node_list; | |||
| @@ -223,7 +223,7 @@ bool DebugServices::IsWatchPoint(std::string kernel_name, const CNodePtr &kernel | |||
| return ret; | |||
| } | |||
| bool DebugServices::IsWatchPointNodeInput(std::string w_name, const CNodePtr &kernel) { | |||
| bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const { | |||
| if (kernel) { | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| for (size_t j = 0; j < input_size; ++j) { | |||
| @@ -260,7 +260,34 @@ void DebugServices::AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData> | |||
| } | |||
| } | |||
| TensorLoader *DebugServices::tensor_loader() const { return tensor_loader_; } | |||
| void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); } | |||
| std::vector<std::shared_ptr<TensorData>> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); } | |||
| std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensorMap(const std::string &node_name) const { | |||
| return tensor_loader_->GetNodeTensorMap(node_name); | |||
| } | |||
| uint32_t DebugServices::GetTensorLoaderIterNum() const { return tensor_loader_->GetIterNum(); } | |||
| void DebugServices::SetTensorLoaderIterNum(uint32_t iter_num) { tensor_loader_->set_iter_num(iter_num); } | |||
| void DebugServices::EmptyPrevTensor() { tensor_loader_->EmptyPrevTensor(); } | |||
| void DebugServices::EmptyCurrentTensor() { tensor_loader_->EmptyCurrentTensor(); } | |||
| bool DebugServices::DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int64_t> &host_shape, | |||
| TypeId host_type, TypeId addr_type_id, const std::string &addr_format, | |||
| size_t slot) const { | |||
| return tensor_loader_->DumpTensorToFile(tensor_name, trans_flag, filepath, host_fmt, host_shape, host_type, | |||
| addr_type_id, addr_format, slot); | |||
| } | |||
| bool DebugServices::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) { | |||
| return tensor_loader_->LoadNewTensor(tensor, keep_prev); | |||
| } | |||
| std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::GetWatchpointTable() { | |||
| return watchpoint_table; | |||
| } | |||
| @@ -177,13 +177,31 @@ class DebugServices { | |||
| std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | |||
| std::vector<TypePtr> *dtype, std::vector<std::vector<int64_t>> *shape); | |||
| bool IsWatchPoint(std::string kernel_name, const CNodePtr &kernel = nullptr); | |||
| bool IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel = nullptr) const; | |||
| bool IsWatchPointNodeInput(std::string w_name, const CNodePtr &kernel); | |||
| bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const; | |||
| void AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, const CNodePtr &kernel); | |||
| TensorLoader *tensor_loader() const; | |||
| void EmptyTensor(); | |||
| std::vector<std::shared_ptr<TensorData>> GetTensor() const; | |||
| std::vector<std::shared_ptr<TensorData>> GetNodeTensorMap(const std::string &node_name) const; | |||
| uint32_t GetTensorLoaderIterNum() const; | |||
| void SetTensorLoaderIterNum(uint32_t iter_num); | |||
| void EmptyPrevTensor(); | |||
| void EmptyCurrentTensor(); | |||
| bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type, | |||
| TypeId addr_type_id, const std::string &addr_format, size_t slot) const; | |||
| bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev); | |||
| std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable(); | |||
| @@ -794,12 +794,11 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode | |||
| #ifdef ENABLE_D | |||
| overflow_ops = CheckOpOverflow(); | |||
| #endif | |||
| auto tensor_loader = debug_services_->tensor_loader(); | |||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||
| if (watchnode.empty()) { | |||
| tensor_list = tensor_loader->GetTensor(); | |||
| tensor_list = debug_services_->GetTensor(); | |||
| } else { | |||
| tensor_list = tensor_loader->GetNodeTensorMap(watchnode); | |||
| tensor_list = debug_services_->GetNodeTensorMap(watchnode); | |||
| debug_services_->AddWeightsBiasInputs(&tensor_list, kernel); | |||
| } | |||
| debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops, | |||
| @@ -841,7 +840,28 @@ void Debugger::SendWatchpoints(const std::list<WatchpointHit> &points) { | |||
| } | |||
| } | |||
| DebugServices *Debugger::debug_services() const { return debug_services_.get(); } | |||
| bool Debugger::DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type, | |||
| TypeId addr_type_id, const std::string &addr_format, size_t slot) const { | |||
| return debug_services_.get()->DumpTensorToFile(tensor_name, trans_flag, filepath, host_fmt, host_shape, host_type, | |||
| addr_type_id, addr_format, slot); | |||
| } | |||
| bool Debugger::DebugServicesIsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel) const { | |||
| return debug_services_.get()->IsWatchPoint(kernel_name, kernel); | |||
| } | |||
| void Debugger::EmptyTensor() { debug_services_.get()->EmptyTensor(); } | |||
| void Debugger::SetTensorLoaderIterNum(uint32_t iter_num) { debug_services_.get()->SetTensorLoaderIterNum(iter_num); } | |||
| void Debugger::EmptyPrevTensor() { debug_services_.get()->EmptyTensor(); } | |||
| uint32_t Debugger::GetTensorLoaderIterNum() const { return debug_services_.get()->GetTensorLoaderIterNum(); } | |||
| bool Debugger::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) { | |||
| return debug_services_.get()->LoadNewTensor(tensor, keep_prev); | |||
| } | |||
| bool Debugger::debugger_enabled() const { return debugger_enabled_; } | |||
| @@ -1169,7 +1189,7 @@ void Debugger::UpdateStepNum(const session::KernelGraph *graph) { | |||
| void Debugger::ClearCurrentData() { | |||
| if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) | |||
| debug_services_->tensor_loader()->EmptyCurrentTensor(); | |||
| debug_services_->EmptyCurrentTensor(); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -88,7 +88,21 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // suspend the execution after a debug_op | |||
| void PostDebugOp(); | |||
| DebugServices *debug_services() const; | |||
| bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type, | |||
| TypeId addr_type_id, const std::string &addr_format, size_t slot) const; | |||
| bool DebugServicesIsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel = nullptr) const; | |||
| void EmptyTensor(); | |||
| void SetTensorLoaderIterNum(uint32_t iter_num); | |||
| void EmptyPrevTensor(); | |||
| uint32_t GetTensorLoaderIterNum() const; | |||
| bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev); | |||
| bool debugger_enabled() const; | |||
| @@ -103,9 +103,9 @@ class TensorLoader { | |||
| void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; } | |||
| bool DumpTensorToFile(std::string tensor_name, bool trans_flag, const std::string &filepath, | |||
| bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type, | |||
| TypeId addr_type_id, std::string addr_format, size_t slot) const { | |||
| TypeId addr_type_id, const std::string &addr_format, size_t slot) const { | |||
| if (filepath.empty()) { | |||
| MS_LOG(ERROR) << "Dump file path is null!"; | |||
| return false; | |||
| @@ -674,8 +674,6 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec | |||
| const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type, | |||
| size_t slot, bool keep_prev) const { | |||
| bool ret = false; | |||
| TensorLoader *tensor_loader = Debugger::GetInstance()->debug_services()->tensor_loader(); | |||
| MS_EXCEPTION_IF_NULL(tensor_loader); | |||
| // TensorData is freed up in AscendSession class | |||
| auto tensor_data = std::make_shared<mindspore::TensorData>(); | |||
| tensor_data->SetName(tensor_name); | |||
| @@ -689,7 +687,7 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec | |||
| } | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| tensor_data->SetTensor(out_tensor); | |||
| ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev); | |||
| ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev); | |||
| return ret; | |||
| } | |||
| #endif | |||
| @@ -87,8 +87,6 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi | |||
| return true; | |||
| } | |||
| TensorLoader *tensor_loader = Debugger::GetInstance()->debug_services()->tensor_loader(); | |||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| auto ret_rt_memcpy = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); | |||
| @@ -101,7 +99,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi | |||
| tensor_data->SetExecutionOrder(execution_order); | |||
| tensor_data->SetTensor(out_tensor); | |||
| tensor_data->SetSlot(slot); | |||
| ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev); | |||
| ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev); | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| return ret; | |||
| } | |||