fix bugs for grpc implementation addressed peer review comments delete device_target code from Adel add checksinglewatchpoint function for node level debugger set the device target when sending metadata add current node name fix bugs for current node name fix run_level_ bug fix bugs for CheckSingleWatchpoint fix multi-outputs node issue fix num_step_ bug fix continue_to previous node issue fix run_level issue fix merge conflict smart kernel read, watch hit stop mid-sep, fix step number, read input tensors cleanup the code and isolate UpdataStepNum function do cpplint, Cppcheck and clang-format check recover CMakeList.txt mindspore grpc implementation fix bugs for grpc implementation addressed peer review comments delete device_target code from Adel add checksinglewatchpoint function for node level debugger set the device target when sending metadata add current node name fix bugs for current node name fix run_level_ bug fix bugs for CheckSingleWatchpoint fix multi-outputs node issue fix num_step_ bug fix continue_to previous node issue fix run_level issue fix merge conflict smart kernel read, watch hit stop mid-sep, fix step number, read input tensors cleanup the code and isolate UpdataStepNum function do cpplint, Cppcheck and clang-format check recover CMakeList.txt only update step_num in one place fix clang-format error fix CI errors part2 update graphengine version addressed commentstags/v0.7.0-beta
| @@ -171,6 +171,61 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||||
| } | } | ||||
| } | } | ||||
| void DebugServices::CheckSingleWatchpoint(std::shared_ptr<TensorData> watchtensor, std::string *name, std::string *slot, | |||||
| char **data_ptr, unsigned int *data_size, int *condition, | |||||
| unsigned int *wacthpoint_id) { | |||||
| std::lock_guard<std::mutex> lg(lock_); | |||||
| std::string current_watchtensor_name; | |||||
| current_watchtensor_name = watchtensor->GetName(); | |||||
| mindspore::tensor::TensorPtr tensor_ptr = watchtensor->GetTensor(); | |||||
| int tensor_data_type = tensor_ptr->data_type_c(); | |||||
| watchpoint_t watchpoint_to_check; | |||||
| for (auto w_table_item : watchpoint_table) { | |||||
| auto check_node_list = std::get<1>(w_table_item).check_node_list; | |||||
| for (auto check_node : check_node_list) { | |||||
| std::string w_name = std::get<0>(check_node); | |||||
| bool w_type = std::get<1>(check_node); | |||||
| // get current the full info including condition, id..., for current watchtensor | |||||
| std::string current_node_name = current_watchtensor_name.substr(0, current_watchtensor_name.find_first_of(":")); | |||||
| if ((w_type == true && (current_watchtensor_name.find(w_name) != string::npos || w_name == "*")) || | |||||
| (w_type == false && current_node_name == w_name)) { | |||||
| watchpoint_to_check = w_table_item.second; | |||||
| // need to add support for float16 and float64, and other types when we support conditions beyond inf and nan | |||||
| if (tensor_data_type != kNumberTypeFloat && tensor_data_type != kNumberTypeFloat32) { | |||||
| return; | |||||
| } | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| float *start_addr = reinterpret_cast<float *>(tensor_ptr->data_c()); | |||||
| unsigned int num_elements = (tensor_ptr->data().nbytes()) / sizeof(float); | |||||
| for (unsigned int index = 0; index < num_elements; index++) { | |||||
| float x = start_addr[index]; | |||||
| if (((watchpoint_to_check.conditions.inf.enabled || watchpoint_to_check.conditions.neg_inf.enabled) && isinf(x)) || | |||||
| (watchpoint_to_check.conditions.nan.enabled && isnan(x))) { | |||||
| std::string name_no_slot = current_watchtensor_name.substr(0, current_watchtensor_name.find_first_of(":")); | |||||
| *name = name_no_slot; | |||||
| *slot = std::to_string(watchtensor->GetSlot()); | |||||
| *data_ptr = reinterpret_cast<char *>(tensor_ptr->data_c()); | |||||
| *data_size = tensor_ptr->data().nbytes(); | |||||
| int condition_item = -1; | |||||
| if (watchpoint_to_check.conditions.nan.enabled) { | |||||
| condition_item = 0; | |||||
| } else if (watchpoint_to_check.conditions.inf.enabled || watchpoint_to_check.conditions.neg_inf.enabled) { | |||||
| condition_item = 1; | |||||
| } | |||||
| *condition = condition_item; | |||||
| *wacthpoint_id = watchpoint_to_check.id; | |||||
| } | |||||
| } | |||||
| } | |||||
| void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | ||||
| std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | ||||
| std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) { | std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) { | ||||
| @@ -78,6 +78,9 @@ class DebugServices { | |||||
| std::vector<unsigned int> *data_size, std::vector<int> *condition, | std::vector<unsigned int> *data_size, std::vector<int> *condition, | ||||
| std::vector<unsigned int> *wacthpoint_id); | std::vector<unsigned int> *wacthpoint_id); | ||||
| void CheckSingleWatchpoint(std::shared_ptr<TensorData> watchnode, std::string *name, std::string *slot, | |||||
| char **data_ptr, unsigned int *data_size, int *condition, unsigned int *wacthpoint_id); | |||||
| void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | ||||
| std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | ||||
| std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape); | std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape); | ||||
| @@ -31,6 +31,10 @@ service EventListener { | |||||
| message Metadata { | message Metadata { | ||||
| string device_name = 1; | string device_name = 1; | ||||
| int32 cur_step = 2; | int32 cur_step = 2; | ||||
| // define the backend is 'GPU' or "Ascend" | |||||
| string backend = 3; | |||||
| // the full name of current node | |||||
| string cur_node = 4; | |||||
| } | } | ||||
| message EventReply { | message EventReply { | ||||
| @@ -44,12 +48,22 @@ message EventReply { | |||||
| oneof cmd { | oneof cmd { | ||||
| bool exit = 2; | bool exit = 2; | ||||
| int32 run_cmd = 3; | |||||
| RunCMD run_cmd = 3; | |||||
| SetCMD set_cmd = 4; | SetCMD set_cmd = 4; | ||||
| ViewCMD view_cmd = 5; | ViewCMD view_cmd = 5; | ||||
| } | } | ||||
| } | } | ||||
| message RunCMD { | |||||
| // step level or node level. "step" or "node" | |||||
| string run_level = 1; | |||||
| oneof cmd { | |||||
| int32 run_steps = 2; | |||||
| // the next node full name | |||||
| string node_name = 3; | |||||
| } | |||||
| } | |||||
| message SetCMD { | message SetCMD { | ||||
| repeated WatchNode watch_nodes = 1; | repeated WatchNode watch_nodes = 1; | ||||
| WatchCondition watch_condition = 2; | WatchCondition watch_condition = 2; | ||||
| @@ -45,6 +45,9 @@ Debugger::Debugger() | |||||
| device_target_(""), | device_target_(""), | ||||
| num_step_(0), | num_step_(0), | ||||
| debugger_enabled_(false), | debugger_enabled_(false), | ||||
| run_level_(""), | |||||
| node_name_(""), | |||||
| cur_name_(""), | |||||
| is_dataset_graph_(false), | is_dataset_graph_(false), | ||||
| partial_memory_(false) {} | partial_memory_(false) {} | ||||
| @@ -164,10 +167,46 @@ void Debugger::PostExecute() { | |||||
| // access lock for public method | // access lock for public method | ||||
| std::lock_guard<std::mutex> a_lock(access_lock_); | std::lock_guard<std::mutex> a_lock(access_lock_); | ||||
| // analyze tensor data and send the watchpoints been hit | // analyze tensor data and send the watchpoints been hit | ||||
| if (run_level_ == "node") { | |||||
| MS_LOG(INFO) << "Debugger is in node level mode "; | |||||
| return; | |||||
| } | |||||
| if (debugger_enabled_ && !is_dataset_graph_) { | if (debugger_enabled_ && !is_dataset_graph_) { | ||||
| num_step_++; | |||||
| MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | ||||
| SendWatchpointsAndSuspend(CheckWatchpoints()); | |||||
| CommandLoop(); | |||||
| } | |||||
| } | |||||
| bool Debugger::ReadNodeDataRequired() { | |||||
| if (debugger_enabled_ && !is_dataset_graph_) { | |||||
| auto watchpoint_table = debug_services_->GetWatchpointTable(); | |||||
| auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table); | |||||
| // if node has a watchpoint on it, is next_to node, or continue_to node then read the kernel tensor data | |||||
| if (is_watchpoint || (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_))) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| void Debugger::PostExecuteNode() { | |||||
| // access lock for public method | |||||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||||
| if (debugger_enabled_ && !is_dataset_graph_) { | |||||
| auto watchpoint_table = debug_services_->GetWatchpointTable(); | |||||
| auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table); | |||||
| // if kernel is watchpoint,and get hit. suspend. | |||||
| if (is_watchpoint) { | |||||
| auto hits = CheckSingleWatchpoint(cur_name_); | |||||
| if (!hits.empty()) { | |||||
| SendWatchpointsAndSuspend(hits); | |||||
| } | |||||
| } | |||||
| // if kernel is not watchpoint and is next_to or continue_to node, suspend. | |||||
| if (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_)) { | |||||
| CommandLoop(); | |||||
| } | |||||
| return; | |||||
| } | } | ||||
| } | } | ||||
| @@ -232,6 +271,8 @@ void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) { | |||||
| Metadata metadata; | Metadata metadata; | ||||
| metadata.set_device_name(device_name); | metadata.set_device_name(device_name); | ||||
| metadata.set_cur_step(num_step_); | metadata.set_cur_step(num_step_); | ||||
| metadata.set_backend(device_target_); | |||||
| metadata.set_cur_node(cur_name_); | |||||
| EventReply reply_metadata = grpc_client_->SendMetadata(metadata); | EventReply reply_metadata = grpc_client_->SendMetadata(metadata); | ||||
| if (reply_metadata.status() != reply_metadata.OK) { | if (reply_metadata.status() != reply_metadata.OK) { | ||||
| MS_LOG(ERROR) << "Error: SendMetadata failed"; | MS_LOG(ERROR) << "Error: SendMetadata failed"; | ||||
| @@ -249,8 +290,11 @@ void Debugger::CommandLoop() { | |||||
| // prepare metadata | // prepare metadata | ||||
| std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id()); | std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id()); | ||||
| Metadata metadata; | Metadata metadata; | ||||
| metadata.set_device_name(device_name); | metadata.set_device_name(device_name); | ||||
| metadata.set_cur_step(num_step_); | metadata.set_cur_step(num_step_); | ||||
| metadata.set_backend(device_target_); | |||||
| metadata.set_cur_node(cur_name_); | |||||
| // loop exit flag | // loop exit flag | ||||
| bool run = false; | bool run = false; | ||||
| @@ -291,6 +335,16 @@ void Debugger::CommandLoop() { | |||||
| break; | break; | ||||
| case DebuggerCommand::kRunCMD: | case DebuggerCommand::kRunCMD: | ||||
| MS_LOG(INFO) << "RunCMD"; | MS_LOG(INFO) << "RunCMD"; | ||||
| { | |||||
| // print run cmd content | |||||
| // get run_level and node_name | |||||
| run_level_ = GetRunLevel(reply); | |||||
| node_name_ = GetNodeName(reply); | |||||
| MS_LOG(INFO) << "run_level: " << run_level_; | |||||
| MS_LOG(INFO) << "node_name_: " << node_name_; | |||||
| } | |||||
| // exit loop | // exit loop | ||||
| run = true; | run = true; | ||||
| break; | break; | ||||
| @@ -445,6 +499,35 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints() const { | |||||
| return hits; | return hits; | ||||
| } | } | ||||
| std::list<WatchpointHit> Debugger::CheckSingleWatchpoint(std::string watchnode) const { | |||||
| auto tensor_loader = debug_services_->tensor_loader(); | |||||
| auto tensors = tensor_loader->GetNodeTensorMap(watchnode); | |||||
| std::list<WatchpointHit> hits; | |||||
| for (std::vector<std::shared_ptr<TensorData>>::iterator it = tensors.begin(); it != tensors.end(); ++it) { | |||||
| auto cur_tensor = *it; | |||||
| std::string name = ""; | |||||
| std::string slot = ""; | |||||
| char *data_ptr = nullptr; | |||||
| unsigned int data_size = 0; | |||||
| int condition = -1; | |||||
| unsigned int watchpoint_id = -1; | |||||
| WatchpointHit hit; | |||||
| debug_services_->CheckSingleWatchpoint(cur_tensor, &name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id); | |||||
| if (name != "") { | |||||
| hit.set_id(watchpoint_id); | |||||
| // here TensorProto act as a tensor indicator, not sending tensor content | |||||
| TensorProto *tensor_item = hit.mutable_tensor(); | |||||
| tensor_item->set_node_name(name); | |||||
| tensor_item->set_slot(slot); | |||||
| tensor_item->set_finished(true); | |||||
| WatchCondition *condition_item = hit.mutable_watch_condition(); | |||||
| condition_item->set_condition(debugger::WatchCondition_Condition(condition)); | |||||
| hits.push_back(hit); | |||||
| } | |||||
| } | |||||
| return hits; | |||||
| } | |||||
| void Debugger::SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points) { | void Debugger::SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points) { | ||||
| // send info about watchpoint | // send info about watchpoint | ||||
| if (!points.empty()) { | if (!points.empty()) { | ||||
| @@ -491,6 +574,24 @@ ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply) { | |||||
| return reply.set_cmd().watch_nodes(); | return reply.set_cmd().watch_nodes(); | ||||
| } | } | ||||
| std::string GetRunLevel(const EventReply &reply) { | |||||
| if (!reply.has_run_cmd()) { | |||||
| MS_LOG(ERROR) << "Error: Not RunCMD, can not get RunLevel. Returning default value: " | |||||
| ""; | |||||
| return ""; | |||||
| } | |||||
| return reply.run_cmd().run_level(); | |||||
| } | |||||
| std::string GetNodeName(const EventReply &reply) { | |||||
| if (!reply.has_run_cmd()) { | |||||
| MS_LOG(ERROR) << "Error: Not RunCMD, can not get NodeName. Returning default value: " | |||||
| ""; | |||||
| return ""; | |||||
| } | |||||
| return reply.run_cmd().node_name(); | |||||
| } | |||||
| WatchCondition GetWatchcondition(const EventReply &reply) { | WatchCondition GetWatchcondition(const EventReply &reply) { | ||||
| if (!reply.has_set_cmd() || !reply.set_cmd().has_watch_condition()) { | if (!reply.has_set_cmd() || !reply.set_cmd().has_watch_condition()) { | ||||
| MS_LOG(ERROR) << "Error: Can not get WatchCondition from command. Returning default value: WatchCondition()."; | MS_LOG(ERROR) << "Error: Can not get WatchCondition from command. Returning default value: WatchCondition()."; | ||||
| @@ -536,4 +637,20 @@ std::string GetTensorFullName(const TensorProto &tensor) { | |||||
| bool Debugger::partial_memory() { return partial_memory_; } | bool Debugger::partial_memory() { return partial_memory_; } | ||||
| void Debugger::SetCurNode(std::string cur_name) { | |||||
| // access lock for public method | |||||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||||
| cur_name_ = cur_name; | |||||
| } | |||||
| std::string Debugger::run_level() const { return run_level_; } | |||||
| void Debugger::SetStepNum(int32_t cur_num_step) { | |||||
| // access lock for public method | |||||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||||
| num_step_ = cur_num_step; | |||||
| } | |||||
| int32_t Debugger::step_num() const { return num_step_; } | |||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -69,6 +69,10 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||||
| // don't need a graph_ptr because it is saved during pre_execute | // don't need a graph_ptr because it is saved during pre_execute | ||||
| void PostExecute(); | void PostExecute(); | ||||
| bool ReadNodeDataRequired(); | |||||
| void PostExecuteNode(); | |||||
| // suspend the execution after a debug_op | // suspend the execution after a debug_op | ||||
| void PostDebugOp(); | void PostDebugOp(); | ||||
| @@ -78,6 +82,14 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||||
| bool partial_memory(); | bool partial_memory(); | ||||
| void SetCurNode(std::string cur_name); | |||||
| std::string run_level() const; | |||||
| void SetStepNum(int32_t cur_num_step); | |||||
| int32_t step_num() const; | |||||
| private: | private: | ||||
| // private constructor for singleton | // private constructor for singleton | ||||
| Debugger(); | Debugger(); | ||||
| @@ -119,6 +131,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||||
| // analyze tensors and check watchpoint conditions | // analyze tensors and check watchpoint conditions | ||||
| // return names of tensors and what condition they hit | // return names of tensors and what condition they hit | ||||
| std::list<WatchpointHit> CheckWatchpoints() const; | std::list<WatchpointHit> CheckWatchpoints() const; | ||||
| std::list<WatchpointHit> CheckSingleWatchpoint(std::string watchnode) const; | |||||
| // send watchpoints that hit and enter command wait loop | // send watchpoints that hit and enter command wait loop | ||||
| void SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points); | void SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points); | ||||
| @@ -131,6 +144,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||||
| std::string device_target_; | std::string device_target_; | ||||
| int32_t num_step_; | int32_t num_step_; | ||||
| bool debugger_enabled_; | bool debugger_enabled_; | ||||
| std::string run_level_; | |||||
| std::string node_name_; | |||||
| std::string cur_name_; | |||||
| bool is_dataset_graph_; | bool is_dataset_graph_; | ||||
| bool partial_memory_; | bool partial_memory_; | ||||
| std::mutex access_lock_; | std::mutex access_lock_; | ||||
| @@ -154,6 +170,8 @@ DebuggerCommand GetCommand(const EventReply &reply); | |||||
| // parse other data out of EventReply | // parse other data out of EventReply | ||||
| ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply); | ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply); | ||||
| std::string GetNodeName(const EventReply &reply); | |||||
| std::string GetRunLevel(const EventReply &reply); | |||||
| WatchCondition GetWatchcondition(const EventReply &reply); | WatchCondition GetWatchcondition(const EventReply &reply); | ||||
| int32_t GetWatchpointID(const EventReply &reply); | int32_t GetWatchpointID(const EventReply &reply); | ||||
| bool GetWatchpointDelete(const EventReply &reply); | bool GetWatchpointDelete(const EventReply &reply); | ||||
| @@ -47,6 +47,9 @@ class TensorLoader { | |||||
| } | } | ||||
| tensor_list.push_back(tensor); | tensor_list.push_back(tensor); | ||||
| tensor_list_map.insert({tensor->GetName(), tensor}); | tensor_list_map.insert({tensor->GetName(), tensor}); | ||||
| auto node_name = tensor->GetName(); | |||||
| node_name = node_name.substr(0, node_name.find_first_of(":")); | |||||
| node_tensor_map.insert({node_name, tensor}); | |||||
| return true; | return true; | ||||
| } | } | ||||
| std::vector<std::shared_ptr<TensorData>> GetTensor() { return tensor_list; } | std::vector<std::shared_ptr<TensorData>> GetTensor() { return tensor_list; } | ||||
| @@ -54,6 +57,17 @@ class TensorLoader { | |||||
| uint32_t GetIterNum() { return iter_num; } | uint32_t GetIterNum() { return iter_num; } | ||||
| std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; } | std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; } | ||||
| std::vector<std::shared_ptr<TensorData>> GetNodeTensorMap(std::string node_name) { | |||||
| std::vector<std::shared_ptr<TensorData>> tensors; | |||||
| for (auto itr = node_tensor_map.begin(); itr != node_tensor_map.end(); itr++) { | |||||
| if (itr->first == node_name) { | |||||
| tensors.push_back(itr->second); | |||||
| } | |||||
| } | |||||
| return tensors; | |||||
| } | |||||
| void SearchTensors(const std::vector<std::string> &search_list, | void SearchTensors(const std::vector<std::string> &search_list, | ||||
| std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> *result_list) { | std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> *result_list) { | ||||
| for (auto i : search_list) { | for (auto i : search_list) { | ||||
| @@ -70,6 +84,7 @@ class TensorLoader { | |||||
| void EmptyTensor() { | void EmptyTensor() { | ||||
| std::lock_guard<std::mutex> lg(lock_); | std::lock_guard<std::mutex> lg(lock_); | ||||
| prev_tensor_list_map.clear(); | prev_tensor_list_map.clear(); | ||||
| node_tensor_map.clear(); | |||||
| tensor_list_map.swap(prev_tensor_list_map); | tensor_list_map.swap(prev_tensor_list_map); | ||||
| tensor_list.clear(); | tensor_list.clear(); | ||||
| } | } | ||||
| @@ -127,6 +142,7 @@ class TensorLoader { | |||||
| private: | private: | ||||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | std::vector<std::shared_ptr<TensorData>> tensor_list; | ||||
| std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map; | std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map; | ||||
| std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map; | |||||
| std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map; | std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map; | ||||
| uint32_t iter_num; | uint32_t iter_num; | ||||
| std::mutex lock_; | std::mutex lock_; | ||||
| @@ -90,9 +90,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi | |||||
| tensor_data->SetTensor(out_tensor); | tensor_data->SetTensor(out_tensor); | ||||
| tensor_data->SetSlot(slot); | tensor_data->SetSlot(slot); | ||||
| ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev); | ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev); | ||||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -31,6 +31,9 @@ | |||||
| #include "runtime/device/gpu/gpu_memory_copy_manager.h" | #include "runtime/device/gpu/gpu_memory_copy_manager.h" | ||||
| #include "common/trans.h" | #include "common/trans.h" | ||||
| #include "ir/dtype.h" | #include "ir/dtype.h" | ||||
| #ifdef ENABLE_DEBUGGER | |||||
| #include "debug/debug_services.h" | |||||
| #endif | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| @@ -221,10 +224,46 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||||
| const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces, | const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces, | ||||
| const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr, | const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr, | ||||
| bool dump_enabled) { | bool dump_enabled) { | ||||
| if (!(debugger && (debugger->debugger_enabled() || dump_enabled))) { | |||||
| // check if we should read the kernel data | |||||
| bool read_data = false; | |||||
| std::string kernel_name = kernel->fullname_with_scope(); | |||||
| if (debugger) { | |||||
| debugger->SetCurNode(kernel_name); | |||||
| if (dump_enabled) { | |||||
| read_data = true; | |||||
| } else if (debugger->debugger_enabled()) { | |||||
| read_data = debugger->ReadNodeDataRequired(); | |||||
| } | |||||
| } | |||||
| if (!read_data) { | |||||
| return; | return; | ||||
| } | } | ||||
| std::string kernel_name = kernel->fullname_with_scope(); | |||||
| // get inputs | |||||
| if (!dump_enabled) { | |||||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||||
| for (size_t j = 0; j < input_size; ++j) { | |||||
| auto input_kernel = kernel->input(j + 1); | |||||
| std::string input_kernel_name = input_kernel->fullname_with_scope(); | |||||
| auto addr = kernel_inputs[j]; | |||||
| auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX); | |||||
| auto format = kOpFormat_DEFAULT; | |||||
| auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type); | |||||
| string input_tensor_name = input_kernel_name + ':' + "0"; | |||||
| std::vector<int> int_shapes; | |||||
| auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX); | |||||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||||
| auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, debugger, false); | |||||
| if (!ret) { | |||||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||||
| << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!"; | |||||
| } | |||||
| } | |||||
| } | |||||
| // get outputs | |||||
| auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | ||||
| for (size_t j = 0; j < output_size; ++j) { | for (size_t j = 0; j < output_size; ++j) { | ||||
| auto addr = kernel_outputs[j]; | auto addr = kernel_outputs[j]; | ||||
| @@ -242,11 +281,21 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||||
| << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | ||||
| } | } | ||||
| } | } | ||||
| debugger->PostExecuteNode(); | |||||
| } | |||||
| void UpdateStepNum(Debugger *debugger, bool dump_enabled) { | |||||
| if (debugger && (debugger->debugger_enabled() || dump_enabled)) { | |||||
| auto cur_step_num = debugger->step_num(); | |||||
| cur_step_num = cur_step_num + 1; | |||||
| debugger->SetStepNum(cur_step_num); | |||||
| } | |||||
| } | } | ||||
| void LoadParameters(const session::KernelGraph *graph, Debugger *debugger, bool dump_enabled) { | void LoadParameters(const session::KernelGraph *graph, Debugger *debugger, bool dump_enabled) { | ||||
| MS_EXCEPTION_IF_NULL(graph); | MS_EXCEPTION_IF_NULL(graph); | ||||
| if (!(debugger && (debugger->debugger_enabled() || dump_enabled))) { | |||||
| if (!(debugger && dump_enabled)) { | |||||
| return; | return; | ||||
| } | } | ||||
| const auto ¶meters = graph->inputs(); | const auto ¶meters = graph->inputs(); | ||||
| @@ -616,9 +665,13 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||||
| #ifdef ENABLE_DEBUGGER | #ifdef ENABLE_DEBUGGER | ||||
| bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration(); | bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration(); | ||||
| if (!mock) { | |||||
| UpdateStepNum(debugger, dump_enabled); | |||||
| } | |||||
| #endif | #endif | ||||
| auto &kernels = graph->execution_order(); | auto &kernels = graph->execution_order(); | ||||
| int exec_order = 1; | int exec_order = 1; | ||||
| for (const auto &kernel : kernels) { | for (const auto &kernel : kernels) { | ||||
| auto kernel_mod = AnfAlgo::GetKernelMod(kernel); | auto kernel_mod = AnfAlgo::GetKernelMod(kernel); | ||||
| MS_EXCEPTION_IF_NULL(kernel_mod); | MS_EXCEPTION_IF_NULL(kernel_mod); | ||||
| @@ -662,7 +715,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||||
| } | } | ||||
| if (!mock) { | if (!mock) { | ||||
| #ifdef ENABLE_DEBUGGER | #ifdef ENABLE_DEBUGGER | ||||
| // collect weights and bias | |||||
| // collect weights and bias for dump mode | |||||
| LoadParameters(graph, debugger, dump_enabled); | LoadParameters(graph, debugger, dump_enabled); | ||||
| #endif | #endif | ||||
| CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); | CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); | ||||