fix bugs for grpc implementation addressed peer review comments delete device_target code from Adel add checksinglewatchpoint function for node level debugger set the device target when sending metadata add current node name fix bugs for current node name fix run_level_ bug fix bugs for CheckSingleWatchpoint fix multi-outputs node issue fix num_step_ bug fix continue_to previous node issue fix run_level issue fix merge conflict smart kernel read, watch hit stop mid-sep, fix step number, read input tensors cleanup the code and isolate UpdataStepNum function do cpplint, Cppcheck and clang-format check recover CMakeList.txt mindspore grpc implementation fix bugs for grpc implementation addressed peer review comments delete device_target code from Adel add checksinglewatchpoint function for node level debugger set the device target when sending metadata add current node name fix bugs for current node name fix run_level_ bug fix bugs for CheckSingleWatchpoint fix multi-outputs node issue fix num_step_ bug fix continue_to previous node issue fix run_level issue fix merge conflict smart kernel read, watch hit stop mid-sep, fix step number, read input tensors cleanup the code and isolate UpdataStepNum function do cpplint, Cppcheck and clang-format check recover CMakeList.txt only update step_num in one place fix clang-format error fix CI errors part2 update graphengine version addressed commentstags/v0.7.0-beta
| @@ -171,6 +171,61 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||
| } | |||
| } | |||
| void DebugServices::CheckSingleWatchpoint(std::shared_ptr<TensorData> watchtensor, std::string *name, std::string *slot, | |||
| char **data_ptr, unsigned int *data_size, int *condition, | |||
| unsigned int *wacthpoint_id) { | |||
| std::lock_guard<std::mutex> lg(lock_); | |||
| std::string current_watchtensor_name; | |||
| current_watchtensor_name = watchtensor->GetName(); | |||
| mindspore::tensor::TensorPtr tensor_ptr = watchtensor->GetTensor(); | |||
| int tensor_data_type = tensor_ptr->data_type_c(); | |||
| watchpoint_t watchpoint_to_check; | |||
| for (auto w_table_item : watchpoint_table) { | |||
| auto check_node_list = std::get<1>(w_table_item).check_node_list; | |||
| for (auto check_node : check_node_list) { | |||
| std::string w_name = std::get<0>(check_node); | |||
| bool w_type = std::get<1>(check_node); | |||
| // get current the full info including condition, id..., for current watchtensor | |||
| std::string current_node_name = current_watchtensor_name.substr(0, current_watchtensor_name.find_first_of(":")); | |||
| if ((w_type == true && (current_watchtensor_name.find(w_name) != string::npos || w_name == "*")) || | |||
| (w_type == false && current_node_name == w_name)) { | |||
| watchpoint_to_check = w_table_item.second; | |||
| // need to add support for float16 and float64, and other types when we support conditions beyond inf and nan | |||
| if (tensor_data_type != kNumberTypeFloat && tensor_data_type != kNumberTypeFloat32) { | |||
| return; | |||
| } | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| float *start_addr = reinterpret_cast<float *>(tensor_ptr->data_c()); | |||
| unsigned int num_elements = (tensor_ptr->data().nbytes()) / sizeof(float); | |||
| for (unsigned int index = 0; index < num_elements; index++) { | |||
| float x = start_addr[index]; | |||
| if (((watchpoint_to_check.conditions.inf.enabled || watchpoint_to_check.conditions.neg_inf.enabled) && isinf(x)) || | |||
| (watchpoint_to_check.conditions.nan.enabled && isnan(x))) { | |||
| std::string name_no_slot = current_watchtensor_name.substr(0, current_watchtensor_name.find_first_of(":")); | |||
| *name = name_no_slot; | |||
| *slot = std::to_string(watchtensor->GetSlot()); | |||
| *data_ptr = reinterpret_cast<char *>(tensor_ptr->data_c()); | |||
| *data_size = tensor_ptr->data().nbytes(); | |||
| int condition_item = -1; | |||
| if (watchpoint_to_check.conditions.nan.enabled) { | |||
| condition_item = 0; | |||
| } else if (watchpoint_to_check.conditions.inf.enabled || watchpoint_to_check.conditions.neg_inf.enabled) { | |||
| condition_item = 1; | |||
| } | |||
| *condition = condition_item; | |||
| *wacthpoint_id = watchpoint_to_check.id; | |||
| } | |||
| } | |||
| } | |||
| void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | |||
| std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | |||
| std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) { | |||
| @@ -78,6 +78,9 @@ class DebugServices { | |||
| std::vector<unsigned int> *data_size, std::vector<int> *condition, | |||
| std::vector<unsigned int> *wacthpoint_id); | |||
| void CheckSingleWatchpoint(std::shared_ptr<TensorData> watchnode, std::string *name, std::string *slot, | |||
| char **data_ptr, unsigned int *data_size, int *condition, unsigned int *wacthpoint_id); | |||
| void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | |||
| std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, | |||
| std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape); | |||
| @@ -31,6 +31,10 @@ service EventListener { | |||
| message Metadata { | |||
| string device_name = 1; | |||
| int32 cur_step = 2; | |||
| // define the backend is 'GPU' or "Ascend" | |||
| string backend = 3; | |||
| // the full name of current node | |||
| string cur_node = 4; | |||
| } | |||
| message EventReply { | |||
| @@ -44,12 +48,22 @@ message EventReply { | |||
| oneof cmd { | |||
| bool exit = 2; | |||
| int32 run_cmd = 3; | |||
| RunCMD run_cmd = 3; | |||
| SetCMD set_cmd = 4; | |||
| ViewCMD view_cmd = 5; | |||
| } | |||
| } | |||
| message RunCMD { | |||
| // step level or node level. "step" or "node" | |||
| string run_level = 1; | |||
| oneof cmd { | |||
| int32 run_steps = 2; | |||
| // the next node full name | |||
| string node_name = 3; | |||
| } | |||
| } | |||
| message SetCMD { | |||
| repeated WatchNode watch_nodes = 1; | |||
| WatchCondition watch_condition = 2; | |||
| @@ -45,6 +45,9 @@ Debugger::Debugger() | |||
| device_target_(""), | |||
| num_step_(0), | |||
| debugger_enabled_(false), | |||
| run_level_(""), | |||
| node_name_(""), | |||
| cur_name_(""), | |||
| is_dataset_graph_(false), | |||
| partial_memory_(false) {} | |||
| @@ -164,10 +167,46 @@ void Debugger::PostExecute() { | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| // analyze tensor data and send the watchpoints been hit | |||
| if (run_level_ == "node") { | |||
| MS_LOG(INFO) << "Debugger is in node level mode "; | |||
| return; | |||
| } | |||
| if (debugger_enabled_ && !is_dataset_graph_) { | |||
| num_step_++; | |||
| MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; | |||
| SendWatchpointsAndSuspend(CheckWatchpoints()); | |||
| CommandLoop(); | |||
| } | |||
| } | |||
| bool Debugger::ReadNodeDataRequired() { | |||
| if (debugger_enabled_ && !is_dataset_graph_) { | |||
| auto watchpoint_table = debug_services_->GetWatchpointTable(); | |||
| auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table); | |||
| // if node has a watchpoint on it, is next_to node, or continue_to node then read the kernel tensor data | |||
| if (is_watchpoint || (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_))) { | |||
| return true; | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| void Debugger::PostExecuteNode() { | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| if (debugger_enabled_ && !is_dataset_graph_) { | |||
| auto watchpoint_table = debug_services_->GetWatchpointTable(); | |||
| auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table); | |||
| // if kernel is watchpoint,and get hit. suspend. | |||
| if (is_watchpoint) { | |||
| auto hits = CheckSingleWatchpoint(cur_name_); | |||
| if (!hits.empty()) { | |||
| SendWatchpointsAndSuspend(hits); | |||
| } | |||
| } | |||
| // if kernel is not watchpoint and is next_to or continue_to node, suspend. | |||
| if (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_)) { | |||
| CommandLoop(); | |||
| } | |||
| return; | |||
| } | |||
| } | |||
| @@ -232,6 +271,8 @@ void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) { | |||
| Metadata metadata; | |||
| metadata.set_device_name(device_name); | |||
| metadata.set_cur_step(num_step_); | |||
| metadata.set_backend(device_target_); | |||
| metadata.set_cur_node(cur_name_); | |||
| EventReply reply_metadata = grpc_client_->SendMetadata(metadata); | |||
| if (reply_metadata.status() != reply_metadata.OK) { | |||
| MS_LOG(ERROR) << "Error: SendMetadata failed"; | |||
| @@ -249,8 +290,11 @@ void Debugger::CommandLoop() { | |||
| // prepare metadata | |||
| std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id()); | |||
| Metadata metadata; | |||
| metadata.set_device_name(device_name); | |||
| metadata.set_cur_step(num_step_); | |||
| metadata.set_backend(device_target_); | |||
| metadata.set_cur_node(cur_name_); | |||
| // loop exit flag | |||
| bool run = false; | |||
| @@ -291,6 +335,16 @@ void Debugger::CommandLoop() { | |||
| break; | |||
| case DebuggerCommand::kRunCMD: | |||
| MS_LOG(INFO) << "RunCMD"; | |||
| { | |||
| // print run cmd content | |||
| // get run_level and node_name | |||
| run_level_ = GetRunLevel(reply); | |||
| node_name_ = GetNodeName(reply); | |||
| MS_LOG(INFO) << "run_level: " << run_level_; | |||
| MS_LOG(INFO) << "node_name_: " << node_name_; | |||
| } | |||
| // exit loop | |||
| run = true; | |||
| break; | |||
| @@ -445,6 +499,35 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints() const { | |||
| return hits; | |||
| } | |||
| std::list<WatchpointHit> Debugger::CheckSingleWatchpoint(std::string watchnode) const { | |||
| auto tensor_loader = debug_services_->tensor_loader(); | |||
| auto tensors = tensor_loader->GetNodeTensorMap(watchnode); | |||
| std::list<WatchpointHit> hits; | |||
| for (std::vector<std::shared_ptr<TensorData>>::iterator it = tensors.begin(); it != tensors.end(); ++it) { | |||
| auto cur_tensor = *it; | |||
| std::string name = ""; | |||
| std::string slot = ""; | |||
| char *data_ptr = nullptr; | |||
| unsigned int data_size = 0; | |||
| int condition = -1; | |||
| unsigned int watchpoint_id = -1; | |||
| WatchpointHit hit; | |||
| debug_services_->CheckSingleWatchpoint(cur_tensor, &name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id); | |||
| if (name != "") { | |||
| hit.set_id(watchpoint_id); | |||
| // here TensorProto act as a tensor indicator, not sending tensor content | |||
| TensorProto *tensor_item = hit.mutable_tensor(); | |||
| tensor_item->set_node_name(name); | |||
| tensor_item->set_slot(slot); | |||
| tensor_item->set_finished(true); | |||
| WatchCondition *condition_item = hit.mutable_watch_condition(); | |||
| condition_item->set_condition(debugger::WatchCondition_Condition(condition)); | |||
| hits.push_back(hit); | |||
| } | |||
| } | |||
| return hits; | |||
| } | |||
| void Debugger::SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points) { | |||
| // send info about watchpoint | |||
| if (!points.empty()) { | |||
| @@ -491,6 +574,24 @@ ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply) { | |||
| return reply.set_cmd().watch_nodes(); | |||
| } | |||
| std::string GetRunLevel(const EventReply &reply) { | |||
| if (!reply.has_run_cmd()) { | |||
| MS_LOG(ERROR) << "Error: Not RunCMD, can not get RunLevel. Returning default value: " | |||
| ""; | |||
| return ""; | |||
| } | |||
| return reply.run_cmd().run_level(); | |||
| } | |||
| std::string GetNodeName(const EventReply &reply) { | |||
| if (!reply.has_run_cmd()) { | |||
| MS_LOG(ERROR) << "Error: Not RunCMD, can not get NodeName. Returning default value: " | |||
| ""; | |||
| return ""; | |||
| } | |||
| return reply.run_cmd().node_name(); | |||
| } | |||
| WatchCondition GetWatchcondition(const EventReply &reply) { | |||
| if (!reply.has_set_cmd() || !reply.set_cmd().has_watch_condition()) { | |||
| MS_LOG(ERROR) << "Error: Can not get WatchCondition from command. Returning default value: WatchCondition()."; | |||
| @@ -536,4 +637,20 @@ std::string GetTensorFullName(const TensorProto &tensor) { | |||
| bool Debugger::partial_memory() { return partial_memory_; } | |||
| void Debugger::SetCurNode(std::string cur_name) { | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| cur_name_ = cur_name; | |||
| } | |||
| std::string Debugger::run_level() const { return run_level_; } | |||
| void Debugger::SetStepNum(int32_t cur_num_step) { | |||
| // access lock for public method | |||
| std::lock_guard<std::mutex> a_lock(access_lock_); | |||
| num_step_ = cur_num_step; | |||
| } | |||
| int32_t Debugger::step_num() const { return num_step_; } | |||
| } // namespace mindspore | |||
| @@ -69,6 +69,10 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // don't need a graph_ptr because it is saved during pre_execute | |||
| void PostExecute(); | |||
| bool ReadNodeDataRequired(); | |||
| void PostExecuteNode(); | |||
| // suspend the execution after a debug_op | |||
| void PostDebugOp(); | |||
| @@ -78,6 +82,14 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| bool partial_memory(); | |||
| void SetCurNode(std::string cur_name); | |||
| std::string run_level() const; | |||
| void SetStepNum(int32_t cur_num_step); | |||
| int32_t step_num() const; | |||
| private: | |||
| // private constructor for singleton | |||
| Debugger(); | |||
| @@ -119,6 +131,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| // analyze tensors and check watchpoint conditions | |||
| // return names of tensors and what condition they hit | |||
| std::list<WatchpointHit> CheckWatchpoints() const; | |||
| std::list<WatchpointHit> CheckSingleWatchpoint(std::string watchnode) const; | |||
| // send watchpoints that hit and enter command wait loop | |||
| void SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points); | |||
| @@ -131,6 +144,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> { | |||
| std::string device_target_; | |||
| int32_t num_step_; | |||
| bool debugger_enabled_; | |||
| std::string run_level_; | |||
| std::string node_name_; | |||
| std::string cur_name_; | |||
| bool is_dataset_graph_; | |||
| bool partial_memory_; | |||
| std::mutex access_lock_; | |||
| @@ -154,6 +170,8 @@ DebuggerCommand GetCommand(const EventReply &reply); | |||
| // parse other data out of EventReply | |||
| ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply); | |||
| std::string GetNodeName(const EventReply &reply); | |||
| std::string GetRunLevel(const EventReply &reply); | |||
| WatchCondition GetWatchcondition(const EventReply &reply); | |||
| int32_t GetWatchpointID(const EventReply &reply); | |||
| bool GetWatchpointDelete(const EventReply &reply); | |||
| @@ -47,6 +47,9 @@ class TensorLoader { | |||
| } | |||
| tensor_list.push_back(tensor); | |||
| tensor_list_map.insert({tensor->GetName(), tensor}); | |||
| auto node_name = tensor->GetName(); | |||
| node_name = node_name.substr(0, node_name.find_first_of(":")); | |||
| node_tensor_map.insert({node_name, tensor}); | |||
| return true; | |||
| } | |||
| std::vector<std::shared_ptr<TensorData>> GetTensor() { return tensor_list; } | |||
| @@ -54,6 +57,17 @@ class TensorLoader { | |||
| uint32_t GetIterNum() { return iter_num; } | |||
| std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; } | |||
| std::vector<std::shared_ptr<TensorData>> GetNodeTensorMap(std::string node_name) { | |||
| std::vector<std::shared_ptr<TensorData>> tensors; | |||
| for (auto itr = node_tensor_map.begin(); itr != node_tensor_map.end(); itr++) { | |||
| if (itr->first == node_name) { | |||
| tensors.push_back(itr->second); | |||
| } | |||
| } | |||
| return tensors; | |||
| } | |||
| void SearchTensors(const std::vector<std::string> &search_list, | |||
| std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> *result_list) { | |||
| for (auto i : search_list) { | |||
| @@ -70,6 +84,7 @@ class TensorLoader { | |||
| void EmptyTensor() { | |||
| std::lock_guard<std::mutex> lg(lock_); | |||
| prev_tensor_list_map.clear(); | |||
| node_tensor_map.clear(); | |||
| tensor_list_map.swap(prev_tensor_list_map); | |||
| tensor_list.clear(); | |||
| } | |||
| @@ -127,6 +142,7 @@ class TensorLoader { | |||
| private: | |||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||
| std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map; | |||
| std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map; | |||
| std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map; | |||
| uint32_t iter_num; | |||
| std::mutex lock_; | |||
| @@ -90,9 +90,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi | |||
| tensor_data->SetTensor(out_tensor); | |||
| tensor_data->SetSlot(slot); | |||
| ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev); | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| return ret; | |||
| } | |||
| #endif | |||
| @@ -31,6 +31,9 @@ | |||
| #include "runtime/device/gpu/gpu_memory_copy_manager.h" | |||
| #include "common/trans.h" | |||
| #include "ir/dtype.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debug_services.h" | |||
| #endif | |||
| namespace mindspore { | |||
| namespace device { | |||
| @@ -221,10 +224,46 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||
| const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces, | |||
| const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr, | |||
| bool dump_enabled) { | |||
| if (!(debugger && (debugger->debugger_enabled() || dump_enabled))) { | |||
| // check if we should read the kernel data | |||
| bool read_data = false; | |||
| std::string kernel_name = kernel->fullname_with_scope(); | |||
| if (debugger) { | |||
| debugger->SetCurNode(kernel_name); | |||
| if (dump_enabled) { | |||
| read_data = true; | |||
| } else if (debugger->debugger_enabled()) { | |||
| read_data = debugger->ReadNodeDataRequired(); | |||
| } | |||
| } | |||
| if (!read_data) { | |||
| return; | |||
| } | |||
| std::string kernel_name = kernel->fullname_with_scope(); | |||
| // get inputs | |||
| if (!dump_enabled) { | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| for (size_t j = 0; j < input_size; ++j) { | |||
| auto input_kernel = kernel->input(j + 1); | |||
| std::string input_kernel_name = input_kernel->fullname_with_scope(); | |||
| auto addr = kernel_inputs[j]; | |||
| auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX); | |||
| auto format = kOpFormat_DEFAULT; | |||
| auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type); | |||
| string input_tensor_name = input_kernel_name + ':' + "0"; | |||
| std::vector<int> int_shapes; | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX); | |||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), | |||
| [](size_t inner_item) { return SizeToInt(inner_item); }); | |||
| auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, debugger, false); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "LoadMemToHost:" | |||
| << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!"; | |||
| } | |||
| } | |||
| } | |||
| // get outputs | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | |||
| for (size_t j = 0; j < output_size; ++j) { | |||
| auto addr = kernel_outputs[j]; | |||
| @@ -242,11 +281,21 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, | |||
| << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; | |||
| } | |||
| } | |||
| debugger->PostExecuteNode(); | |||
| } | |||
| void UpdateStepNum(Debugger *debugger, bool dump_enabled) { | |||
| if (debugger && (debugger->debugger_enabled() || dump_enabled)) { | |||
| auto cur_step_num = debugger->step_num(); | |||
| cur_step_num = cur_step_num + 1; | |||
| debugger->SetStepNum(cur_step_num); | |||
| } | |||
| } | |||
| void LoadParameters(const session::KernelGraph *graph, Debugger *debugger, bool dump_enabled) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| if (!(debugger && (debugger->debugger_enabled() || dump_enabled))) { | |||
| if (!(debugger && dump_enabled)) { | |||
| return; | |||
| } | |||
| const auto ¶meters = graph->inputs(); | |||
| @@ -616,9 +665,13 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration(); | |||
| if (!mock) { | |||
| UpdateStepNum(debugger, dump_enabled); | |||
| } | |||
| #endif | |||
| auto &kernels = graph->execution_order(); | |||
| int exec_order = 1; | |||
| for (const auto &kernel : kernels) { | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(kernel); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| @@ -662,7 +715,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De | |||
| } | |||
| if (!mock) { | |||
| #ifdef ENABLE_DEBUGGER | |||
| // collect weights and bias | |||
| // collect weights and bias for dump mode | |||
| LoadParameters(graph, debugger, dump_enabled); | |||
| #endif | |||
| CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); | |||